Merge tag 'wireless-drivers-next-for-davem-2016-09-15' of git://git.kernel.org/pub...
authorDavid S. Miller <davem@davemloft.net>
Sat, 17 Sep 2016 13:53:29 +0000 (09:53 -0400)
committerDavid S. Miller <davem@davemloft.net>
Sat, 17 Sep 2016 13:53:29 +0000 (09:53 -0400)
Kalle Valo says:

====================
wireless-drivers-next patches for 4.9

Major changes:

iwlwifi

* preparation for new a000 HW continues
* some DQA improvements
* add support for GMAC
* add support for 9460, 9270 and 9170 series

mwifiex

* support random MAC address for scanning
* add HT aggregation support for adhoc mode
* add custom regulatory domain support
* add manufacturing mode support via nl80211 testmode interface

bcma

* support BCM53573 series of wireless SoCs

bitfield.h

* add FIELD_PREP() and FIELD_GET() macros

mt7601u

* convert to use the new bitfield.h macros

brcmfmac

* add support for bcm4339 chip with modalias sdio:c00v02D0d4339

ath10k

* add nl80211 testmode support for 10.4 firmware
* hide kernel addresses from logs using %pK format specifier
* implement NAPI support
* enable peer stats by default

ath9k

* use ieee80211_tx_status_noskb where possible

wil6210

* extract firmware capabilities from the firmware file

ath6kl

* enable firmware crash dumps on the AR6004

ath-current is also merged to fix a conflict in ath10k.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
1485 files changed:
.mailmap
Documentation/ABI/stable/sysfs-devices
Documentation/PCI/MSI-HOWTO.txt
Documentation/PCI/pci.txt
Documentation/arm/CCN.txt
Documentation/arm64/silicon-errata.txt
Documentation/conf.py
Documentation/cpu-freq/cpufreq-stats.txt
Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt
Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
Documentation/devicetree/bindings/net/dsa/qca8k.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/macb.txt
Documentation/devicetree/bindings/net/qcom-emac.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/rockchip-dwmac.txt
Documentation/devicetree/bindings/net/smsc911x.txt
Documentation/devicetree/bindings/net/stm32-dwmac.txt [new file with mode: 0644]
Documentation/devicetree/bindings/serial/8250.txt
Documentation/devicetree/bindings/sound/omap-mcpdm.txt
Documentation/devicetree/bindings/thermal/thermal.txt
Documentation/filesystems/overlayfs.txt
Documentation/hwmon/ftsteutates
Documentation/i2c/slave-interface
Documentation/kernel-documentation.rst
Documentation/kernel-parameters.txt
Documentation/networking/dsa/dsa.txt
Documentation/networking/ip-sysctl.txt
Documentation/networking/rxrpc.txt
Documentation/networking/switchdev.txt
Documentation/power/basic-pm-debugging.txt
Documentation/power/interface.txt
Documentation/powerpc/transactional_memory.txt
Documentation/rapidio/mport_cdev.txt
Documentation/sphinx-static/theme_overrides.css
MAINTAINERS
Makefile
arch/Kconfig
arch/arc/include/asm/entry.h
arch/arc/include/asm/irqflags-compact.h
arch/arc/include/asm/pgtable.h
arch/arc/include/uapi/asm/elf.h
arch/arc/kernel/arcksyms.c
arch/arc/kernel/process.c
arch/arc/kernel/setup.c
arch/arc/mm/cache.c
arch/arc/mm/highmem.c
arch/arm/boot/dts/am335x-baltos.dtsi
arch/arm/boot/dts/am335x-igep0033.dtsi
arch/arm/boot/dts/am335x-phycore-som.dtsi
arch/arm/boot/dts/armada-388-clearfog.dts
arch/arm/boot/dts/exynos5410-odroidxu.dts
arch/arm/boot/dts/imx6qdl.dtsi
arch/arm/boot/dts/imx6sx-sabreauto.dts
arch/arm/boot/dts/imx7d-sdb.dts
arch/arm/boot/dts/kirkwood-ib62x0.dts
arch/arm/boot/dts/kirkwood-openrd.dtsi
arch/arm/boot/dts/logicpd-som-lv.dtsi
arch/arm/boot/dts/logicpd-torpedo-som.dtsi
arch/arm/boot/dts/omap3-overo-base.dtsi
arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi
arch/arm/boot/dts/omap3-overo-tobi-common.dtsi
arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi
arch/arm/boot/dts/rk3066a.dtsi
arch/arm/boot/dts/rk3288.dtsi
arch/arm/boot/dts/rk3xxx.dtsi
arch/arm/boot/dts/sun5i-a13.dtsi
arch/arm/boot/dts/tegra114-dalmore.dts
arch/arm/boot/dts/tegra114-roth.dts
arch/arm/boot/dts/tegra114-tn7.dts
arch/arm/boot/dts/tegra124-jetson-tk1.dts
arch/arm/kernel/entry-armv.S
arch/arm/kernel/hyp-stub.S
arch/arm/kvm/mmu.c
arch/arm/mach-imx/gpc.c
arch/arm/mach-imx/mach-imx6ul.c
arch/arm/mach-imx/pm-imx6.c
arch/arm/mach-omap2/cm33xx.c
arch/arm/mach-omap2/cminst44xx.c
arch/arm/mach-omap2/omap_hwmod.c
arch/arm/mach-omap2/omap_hwmod.h
arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c
arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
arch/arm/mach-pxa/idp.c
arch/arm/mach-pxa/xcep.c
arch/arm/mach-realview/core.c
arch/arm/mach-sa1100/clock.c
arch/arm/mach-sa1100/generic.c
arch/arm/mach-sa1100/generic.h
arch/arm/mach-sa1100/pleb.c
arch/arm/mm/mmu.c
arch/arm/mm/proc-v7.S
arch/arm/xen/enlighten.c
arch/arm64/boot/dts/rockchip/rk3368.dtsi
arch/arm64/include/asm/percpu.h
arch/arm64/include/asm/spinlock.h
arch/arm64/kernel/head.S
arch/arm64/kernel/sleep.S
arch/arm64/kvm/hyp/switch.c
arch/arm64/kvm/sys_regs.c
arch/arm64/mm/dump.c
arch/arm64/mm/numa.c
arch/arm64/mm/proc.S
arch/blackfin/mach-bf561/boards/cm_bf561.c
arch/blackfin/mach-bf561/boards/ezkit.c
arch/ia64/include/asm/uaccess.h
arch/mips/include/asm/page.h
arch/mips/kvm/mmu.c
arch/parisc/Kconfig
arch/parisc/configs/c8000_defconfig
arch/parisc/configs/generic-64bit_defconfig
arch/parisc/include/asm/uaccess.h
arch/parisc/include/uapi/asm/errno.h
arch/parisc/kernel/processor.c
arch/parisc/kernel/time.c
arch/powerpc/include/asm/cputhreads.h
arch/powerpc/include/asm/hmi.h
arch/powerpc/include/asm/paca.h
arch/powerpc/include/asm/pci-bridge.h
arch/powerpc/include/asm/uaccess.h
arch/powerpc/kernel/Makefile
arch/powerpc/kernel/entry_64.S
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/hmi.c [deleted file]
arch/powerpc/kernel/kprobes.c
arch/powerpc/kernel/pci-common.c
arch/powerpc/kernel/prom_init.c
arch/powerpc/kernel/signal_32.c
arch/powerpc/kernel/signal_64.c
arch/powerpc/kernel/smp.c
arch/powerpc/kernel/traps.c
arch/powerpc/kvm/Makefile
arch/powerpc/kvm/book3s_hv_hmi.c [new file with mode: 0644]
arch/powerpc/lib/checksum_32.S
arch/powerpc/mm/fault.c
arch/powerpc/mm/slb_low.S
arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
arch/powerpc/platforms/embedded6xx/holly.c
arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
arch/powerpc/platforms/powernv/opal-dump.c
arch/powerpc/platforms/powernv/opal-elog.c
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/pseries/pci.c
arch/powerpc/platforms/pseries/pci_dlpar.c
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/sysdev/cpm1.c
arch/powerpc/sysdev/cpm_common.c
arch/powerpc/sysdev/fsl_rio.c
arch/powerpc/sysdev/xics/icp-opal.c
arch/s390/Kconfig
arch/s390/configs/default_defconfig
arch/s390/configs/gcov_defconfig
arch/s390/configs/performance_defconfig
arch/s390/defconfig
arch/s390/include/asm/uaccess.h
arch/s390/kernel/setup.c
arch/sparc/include/asm/uaccess_32.h
arch/sparc/include/asm/uaccess_64.h
arch/tile/Kconfig
arch/tile/include/asm/uaccess.h
arch/um/include/asm/common.lds.S
arch/um/kernel/skas/syscall.c
arch/x86/Kconfig
arch/x86/configs/tiny.config
arch/x86/crypto/sha256-mb/sha256_mb.c
arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
arch/x86/crypto/sha512-mb/sha512_mb.c
arch/x86/include/asm/uaccess.h
arch/x86/kernel/apic/apic.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/microcode/amd.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/smpboot.c
arch/x86/kvm/vmx.c
arch/x86/mm/kaslr.c
arch/x86/mm/pat.c
arch/x86/pci/vmd.c
arch/x86/power/hibernate_64.c
arch/x86/um/ptrace_32.c
arch/x86/um/ptrace_64.c
arch/x86/xen/enlighten.c
block/bio.c
block/blk-core.c
block/blk-merge.c
block/blk-mq.c
block/elevator.c
crypto/cryptd.c
drivers/acpi/nfit/mce.c
drivers/acpi/scan.c
drivers/ata/libahci.c
drivers/ata/pata_ninja32.c
drivers/atm/eni.c
drivers/atm/fore200e.c
drivers/atm/he.c
drivers/atm/iphase.c
drivers/atm/nicstar.c
drivers/atm/zatm.c
drivers/base/power/runtime.c
drivers/base/regmap/regcache-rbtree.c
drivers/base/regmap/regcache.c
drivers/base/regmap/regmap.c
drivers/block/floppy.c
drivers/block/xen-blkfront.c
drivers/bus/arm-cci.c
drivers/bus/arm-ccn.c
drivers/bus/vexpress-config.c
drivers/char/hw_random/Kconfig
drivers/char/tpm/tpm2-cmd.c
drivers/char/virtio_console.c
drivers/clk/renesas/r8a7795-cpg-mssr.c
drivers/clk/rockchip/clk-rk3399.c
drivers/clk/sunxi-ng/ccu_common.c
drivers/clk/tegra/clk-tegra114.c
drivers/clocksource/bcm_kona_timer.c
drivers/clocksource/mips-gic-timer.c
drivers/clocksource/pxa_timer.c
drivers/clocksource/sun4i_timer.c
drivers/clocksource/time-armada-370-xp.c
drivers/clocksource/time-pistachio.c
drivers/clocksource/timer-atmel-pit.c
drivers/cpufreq/cpufreq-dt-platdev.c
drivers/crypto/Kconfig
drivers/crypto/Makefile
drivers/crypto/caam/caamalg.c
drivers/crypto/chelsio/Kconfig [new file with mode: 0644]
drivers/crypto/chelsio/Makefile [new file with mode: 0644]
drivers/crypto/chelsio/chcr_algo.c [new file with mode: 0644]
drivers/crypto/chelsio/chcr_algo.h [new file with mode: 0644]
drivers/crypto/chelsio/chcr_core.c [new file with mode: 0644]
drivers/crypto/chelsio/chcr_core.h [new file with mode: 0644]
drivers/crypto/chelsio/chcr_crypto.h [new file with mode: 0644]
drivers/crypto/qat/qat_common/qat_algs.c
drivers/crypto/vmx/aes_xts.c
drivers/dax/dax.c
drivers/dax/pmem.c
drivers/dma/at_xdmac.c
drivers/dma/fsl_raid.c
drivers/dma/img-mdc-dma.c
drivers/dma/pxa_dma.c
drivers/dma/sh/usb-dmac.c
drivers/edac/Kconfig
drivers/edac/Makefile
drivers/edac/skx_edac.c [new file with mode: 0644]
drivers/firmware/arm_scpi.c
drivers/firmware/dmi-id.c
drivers/gpio/Kconfig
drivers/gpio/gpio-max730x.c
drivers/gpio/gpio-mcp23s08.c
drivers/gpio/gpio-sa1100.c
drivers/gpio/gpiolib-of.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/cik_sdma.c
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
drivers/gpu/drm/drm_atomic.c
drivers/gpu/drm/drm_crtc.c
drivers/gpu/drm/drm_fb_helper.c
drivers/gpu/drm/etnaviv/etnaviv_gpu.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/i915_gem_gtt.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_audio.c
drivers/gpu/drm/i915/intel_csr.c
drivers/gpu/drm/i915/intel_ddi.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_drv.h
drivers/gpu/drm/i915/intel_fbc.c
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/imx/imx-drm-core.c
drivers/gpu/drm/imx/ipuv3-crtc.c
drivers/gpu/drm/imx/ipuv3-plane.c
drivers/gpu/drm/mediatek/Kconfig
drivers/gpu/drm/msm/msm_drv.h
drivers/gpu/drm/msm/msm_gem.c
drivers/gpu/drm/msm/msm_gem_submit.c
drivers/gpu/drm/nouveau/nouveau_acpi.c
drivers/gpu/drm/qxl/qxl_fb.c
drivers/gpu/drm/radeon/atombios_crtc.c
drivers/gpu/drm/radeon/radeon_atpx_handler.c
drivers/gpu/drm/radeon/radeon_ttm.c
drivers/gpu/drm/tegra/dsi.c
drivers/gpu/drm/udl/udl_fb.c
drivers/gpu/drm/vc4/vc4_drv.c
drivers/gpu/drm/vc4/vc4_drv.h
drivers/gpu/drm/vc4/vc4_gem.c
drivers/gpu/drm/vc4/vc4_irq.c
drivers/gpu/host1x/mipi.c
drivers/hwmon/it87.c
drivers/i2c/busses/i2c-at91.c
drivers/i2c/busses/i2c-bcm-iproc.c
drivers/i2c/busses/i2c-bcm-kona.c
drivers/i2c/busses/i2c-brcmstb.c
drivers/i2c/busses/i2c-cadence.c
drivers/i2c/busses/i2c-cros-ec-tunnel.c
drivers/i2c/busses/i2c-designware-core.c
drivers/i2c/busses/i2c-meson.c
drivers/i2c/busses/i2c-ocores.c
drivers/i2c/busses/i2c-rcar.c
drivers/i2c/busses/i2c-rk3x.c
drivers/i2c/busses/i2c-sh_mobile.c
drivers/i2c/muxes/i2c-demux-pinctrl.c
drivers/iio/accel/Kconfig
drivers/iio/accel/bma220_spi.c
drivers/iio/accel/bmc150-accel-core.c
drivers/iio/accel/kxsd9.c
drivers/iio/adc/Kconfig
drivers/iio/adc/ad799x.c
drivers/iio/adc/at91_adc.c
drivers/iio/adc/rockchip_saradc.c
drivers/iio/adc/ti-ads1015.c
drivers/iio/adc/ti_am335x_adc.c
drivers/iio/chemical/atlas-ph-sensor.c
drivers/iio/common/hid-sensors/hid-sensor-attributes.c
drivers/iio/dac/stx104.c
drivers/iio/humidity/Kconfig
drivers/iio/humidity/am2315.c
drivers/iio/humidity/hdc100x.c
drivers/iio/industrialio-buffer.c
drivers/iio/industrialio-core.c
drivers/iio/light/Kconfig
drivers/iio/pressure/bmp280-core.c
drivers/iio/proximity/as3935.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/multicast.c
drivers/infiniband/hw/cxgb4/Kconfig
drivers/infiniband/hw/cxgb4/Makefile
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/cxgb4/t4.h
drivers/infiniband/hw/hfi1/affinity.c
drivers/infiniband/hw/hfi1/chip.c
drivers/infiniband/hw/hfi1/chip.h
drivers/infiniband/hw/hfi1/debugfs.c
drivers/infiniband/hw/hfi1/driver.c
drivers/infiniband/hw/hfi1/file_ops.c
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/init.c
drivers/infiniband/hw/hfi1/mad.c
drivers/infiniband/hw/hfi1/pio_copy.c
drivers/infiniband/hw/hfi1/qp.c
drivers/infiniband/hw/hfi1/qsfp.c
drivers/infiniband/hw/hfi1/qsfp.h
drivers/infiniband/hw/hfi1/user_sdma.c
drivers/infiniband/hw/i40iw/i40iw.h
drivers/infiniband/hw/i40iw/i40iw_cm.c
drivers/infiniband/hw/i40iw/i40iw_hw.c
drivers/infiniband/hw/i40iw/i40iw_main.c
drivers/infiniband/hw/i40iw/i40iw_utils.c
drivers/infiniband/hw/i40iw/i40iw_verbs.c
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mem.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/ocrdma/ocrdma_hw.c
drivers/infiniband/hw/ocrdma/ocrdma_sli.h
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/hw/qib/qib_debugfs.c
drivers/infiniband/hw/qib/qib_fs.c
drivers/infiniband/hw/qib/qib_qp.c
drivers/infiniband/hw/usnic/usnic_ib_main.c
drivers/infiniband/sw/rdmavt/qp.c
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/isert/ib_isert.c
drivers/infiniband/ulp/isert/ib_isert.h
drivers/infiniband/ulp/srpt/ib_srpt.c
drivers/input/keyboard/tegra-kbc.c
drivers/input/rmi4/rmi_driver.c
drivers/input/serio/i8042.c
drivers/input/touchscreen/ads7846.c
drivers/input/touchscreen/silead.c
drivers/iommu/arm-smmu-v3.c
drivers/iommu/arm-smmu.c
drivers/iommu/io-pgtable-arm-v7s.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-gic-v3.c
drivers/irqchip/irq-gic.c
drivers/irqchip/irq-mips-gic.c
drivers/isdn/hardware/mISDN/avmfritz.c
drivers/isdn/hardware/mISDN/hfcmulti.c
drivers/isdn/hardware/mISDN/mISDNipac.c
drivers/isdn/hardware/mISDN/w6692.c
drivers/macintosh/ams/ams-i2c.c
drivers/macintosh/windfarm_pm112.c
drivers/macintosh/windfarm_pm72.c
drivers/macintosh/windfarm_rm31.c
drivers/mailbox/Kconfig
drivers/mailbox/bcm-pdc-mailbox.c
drivers/md/bcache/super.c
drivers/md/bitmap.c
drivers/md/dm-bufio.c
drivers/md/dm-crypt.c
drivers/md/dm-flakey.c
drivers/md/dm-log-writes.c
drivers/md/dm-log.c
drivers/md/dm-raid.c
drivers/md/dm-round-robin.c
drivers/md/md-cluster.c
drivers/md/md.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/memory/omap-gpmc.c
drivers/misc/Kconfig
drivers/misc/Makefile
drivers/misc/bh1780gli.c [deleted file]
drivers/misc/cxl/vphb.c
drivers/misc/lkdtm_rodata.c
drivers/misc/lkdtm_usercopy.c
drivers/misc/mei/hw-me.c
drivers/misc/mei/pci-me.c
drivers/mmc/card/block.c
drivers/mmc/card/queue.c
drivers/mmc/card/queue.h
drivers/net/bonding/bond_main.c
drivers/net/dsa/Kconfig
drivers/net/dsa/Makefile
drivers/net/dsa/b53/b53_common.c
drivers/net/dsa/b53/b53_priv.h
drivers/net/dsa/b53/b53_regs.h
drivers/net/dsa/bcm_sf2.c
drivers/net/dsa/bcm_sf2.h
drivers/net/dsa/bcm_sf2_regs.h
drivers/net/dsa/mv88e6060.c
drivers/net/dsa/mv88e6xxx/Kconfig
drivers/net/dsa/mv88e6xxx/Makefile
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/mv88e6xxx/global2.c [new file with mode: 0644]
drivers/net/dsa/mv88e6xxx/global2.h [new file with mode: 0644]
drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
drivers/net/dsa/qca8k.c [new file with mode: 0644]
drivers/net/dsa/qca8k.h [new file with mode: 0644]
drivers/net/ethernet/3com/3c59x.c
drivers/net/ethernet/adaptec/starfire.c
drivers/net/ethernet/agere/et131x.c
drivers/net/ethernet/amazon/ena/ena_netdev.c
drivers/net/ethernet/amd/7990.c
drivers/net/ethernet/amd/amd8111e.c
drivers/net/ethernet/amd/xgbe/xgbe-drv.c
drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
drivers/net/ethernet/amd/xgbe/xgbe-main.c
drivers/net/ethernet/amd/xgbe/xgbe.h
drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
drivers/net/ethernet/apm/xgene/xgene_enet_main.c
drivers/net/ethernet/apm/xgene/xgene_enet_main.h
drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
drivers/net/ethernet/arc/emac_mdio.c
drivers/net/ethernet/atheros/alx/alx.h
drivers/net/ethernet/atheros/alx/hw.c
drivers/net/ethernet/atheros/alx/hw.h
drivers/net/ethernet/atheros/alx/main.c
drivers/net/ethernet/atheros/alx/reg.h
drivers/net/ethernet/broadcom/bcmsysport.c
drivers/net/ethernet/broadcom/bgmac-bcma.c
drivers/net/ethernet/broadcom/bgmac.c
drivers/net/ethernet/broadcom/bgmac.h
drivers/net/ethernet/broadcom/bnx2.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/cadence/macb.c
drivers/net/ethernet/cadence/macb.h
drivers/net/ethernet/cavium/Kconfig
drivers/net/ethernet/cavium/liquidio/Makefile
drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c [new file with mode: 0644]
drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h [new file with mode: 0644]
drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h [new file with mode: 0644]
drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
drivers/net/ethernet/cavium/liquidio/cn66xx_device.h
drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
drivers/net/ethernet/cavium/liquidio/lio_core.c [new file with mode: 0644]
drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
drivers/net/ethernet/cavium/liquidio/lio_main.c
drivers/net/ethernet/cavium/liquidio/liquidio_common.h
drivers/net/ethernet/cavium/liquidio/octeon_config.h
drivers/net/ethernet/cavium/liquidio/octeon_console.c
drivers/net/ethernet/cavium/liquidio/octeon_device.c
drivers/net/ethernet/cavium/liquidio/octeon_device.h
drivers/net/ethernet/cavium/liquidio/octeon_droq.c
drivers/net/ethernet/cavium/liquidio/octeon_droq.h
drivers/net/ethernet/cavium/liquidio/octeon_iq.h
drivers/net/ethernet/cavium/liquidio/octeon_main.h
drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
drivers/net/ethernet/cavium/liquidio/octeon_network.h
drivers/net/ethernet/cavium/liquidio/octeon_nic.c
drivers/net/ethernet/cavium/liquidio/octeon_nic.h
drivers/net/ethernet/cavium/liquidio/request_manager.c
drivers/net/ethernet/cavium/liquidio/response_manager.c
drivers/net/ethernet/cavium/thunder/nic.h
drivers/net/ethernet/cavium/thunder/nic_main.c
drivers/net/ethernet/cavium/thunder/nic_reg.h
drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
drivers/net/ethernet/cavium/thunder/nicvf_main.c
drivers/net/ethernet/cavium/thunder/nicvf_queues.c
drivers/net/ethernet/chelsio/cxgb4/Makefile
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c [new file with mode: 0644]
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
drivers/net/ethernet/chelsio/cxgb4/sched.c [new file with mode: 0644]
drivers/net/ethernet/chelsio/cxgb4/sched.h [new file with mode: 0644]
drivers/net/ethernet/chelsio/cxgb4/sge.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
drivers/net/ethernet/chelsio/cxgb4vf/sge.c
drivers/net/ethernet/chelsio/libcxgb/Makefile
drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c [new file with mode: 0644]
drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h [new file with mode: 0644]
drivers/net/ethernet/dlink/sundance.c
drivers/net/ethernet/emulex/benet/be.h
drivers/net/ethernet/emulex/benet/be_cmds.c
drivers/net/ethernet/emulex/benet/be_cmds.h
drivers/net/ethernet/emulex/benet/be_ethtool.c
drivers/net/ethernet/emulex/benet/be_hw.h
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/freescale/fman/fman_mac.h
drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
drivers/net/ethernet/freescale/fs_enet/fs_enet.h
drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
drivers/net/ethernet/freescale/fs_enet/mac-fec.c
drivers/net/ethernet/freescale/fs_enet/mac-scc.c
drivers/net/ethernet/freescale/fsl_pq_mdio.c
drivers/net/ethernet/freescale/gianfar.c
drivers/net/ethernet/freescale/gianfar.h
drivers/net/ethernet/freescale/ucc_geth.c
drivers/net/ethernet/hisilicon/hip04_eth.c
drivers/net/ethernet/hisilicon/hisi_femac.c
drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
drivers/net/ethernet/i825xx/82596.c
drivers/net/ethernet/ibm/emac/core.c
drivers/net/ethernet/ibm/emac/mal.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/ibm/ibmvnic.h
drivers/net/ethernet/intel/fm10k/fm10k.h
drivers/net/ethernet/intel/fm10k/fm10k_common.c
drivers/net/ethernet/intel/fm10k/fm10k_common.h
drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
drivers/net/ethernet/intel/fm10k/fm10k_iov.c
drivers/net/ethernet/intel/fm10k/fm10k_main.c
drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
drivers/net/ethernet/intel/fm10k/fm10k_pci.c
drivers/net/ethernet/intel/fm10k/fm10k_pf.c
drivers/net/ethernet/intel/fm10k/fm10k_type.h
drivers/net/ethernet/intel/i40e/i40e.h
drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
drivers/net/ethernet/intel/i40e/i40e_client.c
drivers/net/ethernet/intel/i40e/i40e_client.h
drivers/net/ethernet/intel/i40e/i40e_ethtool.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
drivers/net/ethernet/intel/i40evf/i40e_txrx.c
drivers/net/ethernet/intel/i40evf/i40evf.h
drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
drivers/net/ethernet/intel/i40evf/i40evf_main.c
drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
drivers/net/ethernet/intel/igb/e1000_82575.h
drivers/net/ethernet/intel/igb/e1000_defines.h
drivers/net/ethernet/intel/igb/e1000_regs.h
drivers/net/ethernet/intel/igb/igb.h
drivers/net/ethernet/intel/igb/igb_ethtool.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/igb/igb_ptp.c
drivers/net/ethernet/intel/ixgbe/ixgbe.h
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
drivers/net/ethernet/intel/ixgbevf/vf.c
drivers/net/ethernet/marvell/mvneta_bm.h
drivers/net/ethernet/marvell/sky2.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mediatek/mtk_eth_soc.h
drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/en_rx.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
drivers/net/ethernet/mellanox/mlx4/port.c
drivers/net/ethernet/mellanox/mlx5/core/Makefile
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/cq.c
drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
drivers/net/ethernet/mellanox/mlx5/core/dev.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_common.c
drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
drivers/net/ethernet/mellanox/mlx5/core/fw.c
drivers/net/ethernet/mellanox/mlx5/core/lag.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/mad.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mcg.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlx5/core/mr.c
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
drivers/net/ethernet/mellanox/mlx5/core/pd.c
drivers/net/ethernet/mellanox/mlx5/core/port.c
drivers/net/ethernet/mellanox/mlx5/core/qp.c
drivers/net/ethernet/mellanox/mlx5/core/rl.c
drivers/net/ethernet/mellanox/mlx5/core/sriov.c
drivers/net/ethernet/mellanox/mlx5/core/srq.c
drivers/net/ethernet/mellanox/mlx5/core/transobj.c
drivers/net/ethernet/mellanox/mlx5/core/uar.c
drivers/net/ethernet/mellanox/mlx5/core/vport.c
drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
drivers/net/ethernet/mellanox/mlxsw/core.h
drivers/net/ethernet/mellanox/mlxsw/port.h
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/natsemi/ns83820.c
drivers/net/ethernet/netronome/nfp/nfp_net_common.c
drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
drivers/net/ethernet/nuvoton/w90p910_ether.c
drivers/net/ethernet/nxp/lpc_eth.c
drivers/net/ethernet/qlogic/qed/Makefile
drivers/net/ethernet/qlogic/qed/qed.h
drivers/net/ethernet/qlogic/qed/qed_cxt.c
drivers/net/ethernet/qlogic/qed/qed_dcbx.c
drivers/net/ethernet/qlogic/qed/qed_debug.c [new file with mode: 0644]
drivers/net/ethernet/qlogic/qed/qed_debug.h [new file with mode: 0644]
drivers/net/ethernet/qlogic/qed/qed_dev.c
drivers/net/ethernet/qlogic/qed/qed_hsi.h
drivers/net/ethernet/qlogic/qed/qed_hw.c
drivers/net/ethernet/qlogic/qed/qed_init_ops.c
drivers/net/ethernet/qlogic/qed/qed_int.c
drivers/net/ethernet/qlogic/qed/qed_l2.c
drivers/net/ethernet/qlogic/qed/qed_l2.h
drivers/net/ethernet/qlogic/qed/qed_main.c
drivers/net/ethernet/qlogic/qed/qed_mcp.c
drivers/net/ethernet/qlogic/qed/qed_mcp.h
drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
drivers/net/ethernet/qlogic/qed/qed_selftest.c
drivers/net/ethernet/qlogic/qed/qed_spq.c
drivers/net/ethernet/qlogic/qed/qed_sriov.c
drivers/net/ethernet/qlogic/qed/qed_vf.c
drivers/net/ethernet/qlogic/qed/qed_vf.h
drivers/net/ethernet/qlogic/qede/qede.h
drivers/net/ethernet/qlogic/qede/qede_ethtool.c
drivers/net/ethernet/qlogic/qede/qede_main.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
drivers/net/ethernet/qualcomm/Kconfig
drivers/net/ethernet/qualcomm/Makefile
drivers/net/ethernet/qualcomm/emac/Makefile [new file with mode: 0644]
drivers/net/ethernet/qualcomm/emac/emac-mac.c [new file with mode: 0644]
drivers/net/ethernet/qualcomm/emac/emac-mac.h [new file with mode: 0644]
drivers/net/ethernet/qualcomm/emac/emac-phy.c [new file with mode: 0644]
drivers/net/ethernet/qualcomm/emac/emac-phy.h [new file with mode: 0644]
drivers/net/ethernet/qualcomm/emac/emac-sgmii.c [new file with mode: 0644]
drivers/net/ethernet/qualcomm/emac/emac-sgmii.h [new file with mode: 0644]
drivers/net/ethernet/qualcomm/emac/emac.c [new file with mode: 0644]
drivers/net/ethernet/qualcomm/emac/emac.h [new file with mode: 0644]
drivers/net/ethernet/realtek/8139cp.c
drivers/net/ethernet/renesas/ravb.h
drivers/net/ethernet/renesas/ravb_main.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/rocker/rocker_main.c
drivers/net/ethernet/rocker/rocker_ofdpa.c
drivers/net/ethernet/sfc/ef10.c
drivers/net/ethernet/sfc/efx.c
drivers/net/ethernet/sfc/falcon_boards.c
drivers/net/ethernet/sfc/farch.c
drivers/net/ethernet/sfc/net_driver.h
drivers/net/ethernet/sfc/nic.c
drivers/net/ethernet/sfc/nic.h
drivers/net/ethernet/sfc/selftest.c
drivers/net/ethernet/sfc/selftest.h
drivers/net/ethernet/sis/sis900.c
drivers/net/ethernet/sis/sis900.h
drivers/net/ethernet/smsc/smc91x.c
drivers/net/ethernet/smsc/smc91x.h
drivers/net/ethernet/smsc/smsc911x.c
drivers/net/ethernet/stmicro/stmmac/Kconfig
drivers/net/ethernet/stmicro/stmmac/Makefile
drivers/net/ethernet/stmicro/stmmac/common.h
drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c [new file with mode: 0644]
drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
drivers/net/ethernet/synopsys/dwc_eth_qos.c
drivers/net/ethernet/tehuti/tehuti.c
drivers/net/ethernet/ti/cpmac.c
drivers/net/ethernet/ti/cpsw.c
drivers/net/ethernet/ti/davinci_cpdma.c
drivers/net/ethernet/ti/davinci_cpdma.h
drivers/net/ethernet/ti/davinci_emac.c
drivers/net/ethernet/toshiba/ps3_gelic_net.c
drivers/net/ethernet/via/via-velocity.c
drivers/net/ethernet/xilinx/Kconfig
drivers/net/ethernet/xilinx/xilinx_axienet_main.c
drivers/net/ethernet/xilinx/xilinx_emaclite.c
drivers/net/hamradio/bpqether.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/hyperv/rndis_filter.c
drivers/net/macsec.c
drivers/net/phy/Kconfig
drivers/net/phy/Makefile
drivers/net/phy/micrel.c
drivers/net/phy/mscc.c
drivers/net/phy/phy.c
drivers/net/phy/xilinx_gmii2rgmii.c
drivers/net/ppp/ppp_generic.c
drivers/net/team/team_mode_loadbalance.c
drivers/net/tun.c
drivers/net/usb/asix.h
drivers/net/usb/asix_common.c
drivers/net/usb/asix_devices.c
drivers/net/usb/ax88172a.c
drivers/net/usb/hso.c
drivers/net/usb/kaweth.c
drivers/net/usb/lan78xx.c
drivers/net/usb/pegasus.c
drivers/net/usb/r8152.c
drivers/net/usb/smsc95xx.c
drivers/net/usb/smsc95xx.h
drivers/net/veth.c
drivers/net/vmxnet3/vmxnet3_drv.c
drivers/net/vmxnet3/vmxnet3_int.h
drivers/net/vrf.c
drivers/net/vxlan.c
drivers/net/wan/fsl_ucc_hdlc.c
drivers/net/wan/sbni.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h
drivers/net/wireless/intel/iwlegacy/common.h
drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c
drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.h
drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
drivers/net/xen-netback/common.h
drivers/net/xen-netback/hash.c
drivers/net/xen-netback/xenbus.c
drivers/nvdimm/bus.c
drivers/nvme/host/Kconfig
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.c
drivers/nvme/host/fabrics.h
drivers/nvme/host/rdma.c
drivers/nvme/target/Kconfig
drivers/nvme/target/loop.c
drivers/nvme/target/rdma.c
drivers/of/base.c
drivers/of/fdt.c
drivers/of/irq.c
drivers/of/platform.c
drivers/pci/host-bridge.c
drivers/pci/msi.c
drivers/pci/quirks.c
drivers/perf/arm_pmu.c
drivers/phy/phy-brcm-sata.c
drivers/phy/phy-sun4i-usb.c
drivers/phy/phy-sun9i-usb.c
drivers/pinctrl/intel/pinctrl-cherryview.c
drivers/pinctrl/pinctrl-pistachio.c
drivers/pinctrl/sunxi/pinctrl-sun8i-a23.c
drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c
drivers/platform/olpc/olpc-ec.c
drivers/platform/x86/intel_pmic_gpio.c
drivers/ptp/ptp_ixp46x.c
drivers/rapidio/devices/tsi721.c
drivers/regulator/max14577-regulator.c
drivers/regulator/max77693-regulator.c
drivers/regulator/qcom_smd-regulator.c
drivers/scsi/aacraid/commctrl.c
drivers/scsi/constants.c
drivers/scsi/fcoe/fcoe_ctlr.c
drivers/scsi/megaraid/megaraid_sas_base.c
drivers/scsi/megaraid/megaraid_sas_fusion.c
drivers/scsi/mpt3sas/mpt3sas_base.c
drivers/scsi/scsi_devinfo.c
drivers/scsi/scsi_transport_sas.c
drivers/scsi/ses.c
drivers/scsi/wd719x.c
drivers/spi/spi-img-spfi.c
drivers/spi/spi-mt65xx.c
drivers/spi/spi-pxa2xx-pci.c
drivers/spi/spi-qup.c
drivers/spi/spi-sh-msiof.c
drivers/spi/spi.c
drivers/staging/comedi/drivers/adv_pci1760.c
drivers/staging/comedi/drivers/comedi_test.c
drivers/staging/comedi/drivers/daqboard2000.c
drivers/staging/comedi/drivers/dt2811.c
drivers/staging/comedi/drivers/ni_mio_common.c
drivers/staging/iio/impedance-analyzer/ad5933.c
drivers/staging/lustre/lustre/llite/namei.c
drivers/staging/wilc1000/host_interface.c
drivers/staging/wilc1000/linux_wlan.c
drivers/staging/wilc1000/wilc_wfi_cfgoperations.c
drivers/target/iscsi/cxgbit/cxgbit_cm.c
drivers/thermal/cpu_cooling.c
drivers/thermal/imx_thermal.c
drivers/thermal/int340x_thermal/int3406_thermal.c
drivers/thermal/rcar_thermal.c
drivers/thunderbolt/nhi.c
drivers/thunderbolt/switch.c
drivers/tty/serial/8250/8250.h
drivers/tty/serial/8250/8250_core.c
drivers/tty/serial/8250/8250_fintek.c
drivers/tty/serial/8250/8250_mid.c
drivers/tty/serial/8250/8250_omap.c
drivers/tty/serial/8250/8250_pci.c
drivers/tty/serial/8250/8250_port.c
drivers/tty/serial/8250/Kconfig
drivers/usb/chipidea/udc.c
drivers/usb/class/cdc-acm.c
drivers/usb/class/cdc-acm.h
drivers/usb/core/config.c
drivers/usb/core/devio.c
drivers/usb/core/hub.c
drivers/usb/dwc2/core.h
drivers/usb/dwc2/platform.c
drivers/usb/dwc3/core.c
drivers/usb/dwc3/debug.h
drivers/usb/dwc3/dwc3-of-simple.c
drivers/usb/dwc3/dwc3-pci.c
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/composite.c
drivers/usb/gadget/configfs.c
drivers/usb/gadget/function/f_eem.c
drivers/usb/gadget/function/f_rndis.c
drivers/usb/gadget/function/rndis.c
drivers/usb/gadget/function/u_ether.c
drivers/usb/gadget/function/u_serial.c
drivers/usb/gadget/function/uvc_configfs.c
drivers/usb/gadget/legacy/inode.c
drivers/usb/gadget/udc/core.c
drivers/usb/gadget/udc/fsl_qe_udc.c
drivers/usb/gadget/udc/renesas_usb3.c
drivers/usb/host/ehci-hcd.c
drivers/usb/host/max3421-hcd.c
drivers/usb/host/xhci-hub.c
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci-ring.c
drivers/usb/misc/ftdi-elan.c
drivers/usb/misc/usbtest.c
drivers/usb/musb/musb_virthub.c
drivers/usb/phy/phy-generic.c
drivers/usb/phy/phy-omap-otg.c
drivers/usb/renesas_usbhs/common.c
drivers/usb/renesas_usbhs/fifo.c
drivers/usb/renesas_usbhs/mod.c
drivers/usb/renesas_usbhs/mod_gadget.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/ftdi_sio_ids.h
drivers/usb/serial/mos7720.c
drivers/usb/serial/mos7840.c
drivers/usb/serial/option.c
drivers/usb/serial/usb-serial.c
drivers/vhost/scsi.c
drivers/virtio/virtio_ring.c
drivers/xen/xenbus/xenbus_dev_frontend.c
fs/afs/callback.c
fs/afs/cmservice.c
fs/afs/flock.c
fs/afs/fsclient.c
fs/afs/internal.h
fs/afs/main.c
fs/afs/rxrpc.c
fs/afs/server.c
fs/afs/vlclient.c
fs/afs/vlocation.c
fs/binfmt_elf.c
fs/block_dev.c
fs/btrfs/backref.c
fs/btrfs/ctree.h
fs/btrfs/delayed-ref.c
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.h
fs/btrfs/file.c
fs/btrfs/inode-map.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/qgroup.c
fs/btrfs/qgroup.h
fs/btrfs/relocation.c
fs/btrfs/root-tree.c
fs/btrfs/send.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/tree-log.c
fs/btrfs/tree-log.h
fs/btrfs/volumes.c
fs/ceph/dir.c
fs/crypto/policy.c
fs/devpts/inode.c
fs/dlm/debug_fs.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/super.c
fs/ext4/xattr.c
fs/ext4/xattr.h
fs/f2fs/data.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/node.c
fs/f2fs/super.c
fs/fuse/file.c
fs/iomap.c
fs/kernfs/file.c
fs/nfs/blocklayout/blocklayout.c
fs/nfs/blocklayout/blocklayout.h
fs/nfs/blocklayout/extent_tree.c
fs/nfs/callback.c
fs/nfs/callback_proc.c
fs/nfs/client.c
fs/nfs/flexfilelayout/flexfilelayout.c
fs/nfs/flexfilelayout/flexfilelayout.h
fs/nfs/flexfilelayout/flexfilelayoutdev.c
fs/nfs/internal.h
fs/nfs/nfs42proc.c
fs/nfs/nfs4client.c
fs/nfs/nfs4proc.c
fs/nfs/nfs4session.c
fs/nfs/nfs4session.h
fs/nfs/pnfs.c
fs/nfs/super.c
fs/overlayfs/copy_up.c
fs/overlayfs/dir.c
fs/overlayfs/inode.c
fs/overlayfs/overlayfs.h
fs/overlayfs/readdir.c
fs/overlayfs/super.c
fs/proc/base.c
fs/proc/task_mmu.c
fs/seq_file.c
fs/sysfs/file.c
fs/ubifs/tnc_commit.c
fs/ubifs/xattr.c
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_btree.c
fs/xfs/libxfs/xfs_defer.c
fs/xfs/libxfs/xfs_defer.h
fs/xfs/libxfs/xfs_format.h
fs/xfs/libxfs/xfs_rmap_btree.c
fs/xfs/libxfs/xfs_sb.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_file.c
fs/xfs/xfs_fsops.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iomap.h
fs/xfs/xfs_iops.c
fs/xfs/xfs_super.c
fs/xfs/xfs_trace.h
include/asm-generic/uaccess.h
include/linux/acpi.h
include/linux/bcma/bcma_regs.h
include/linux/bio.h
include/linux/blkdev.h
include/linux/bpf.h
include/linux/compiler-gcc.h
include/linux/compiler.h
include/linux/fence.h
include/linux/filter.h
include/linux/fs.h
include/linux/fscrypto.h
include/linux/host1x.h
include/linux/hyperv.h
include/linux/if_bridge.h
include/linux/if_vlan.h
include/linux/iio/sw_trigger.h
include/linux/inet_diag.h
include/linux/iomap.h
include/linux/irqchip/arm-gic-v3.h
include/linux/mempolicy.h
include/linux/mfd/da8xx-cfgchip.h [new file with mode: 0644]
include/linux/mfd/ti_am335x_tscadc.h
include/linux/mlx5/cq.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/fs.h
include/linux/mlx5/mlx5_ifc.h
include/linux/mlx5/port.h
include/linux/mlx5/qp.h
include/linux/mlx5/vport.h
include/linux/mm.h
include/linux/mmzone.h
include/linux/net.h
include/linux/netdevice.h
include/linux/netfilter/nfnetlink_acct.h
include/linux/nvme.h
include/linux/pci.h
include/linux/perf_event.h
include/linux/qed/common_hsi.h
include/linux/qed/eth_common.h
include/linux/qed/iscsi_common.h
include/linux/qed/qed_chain.h
include/linux/qed/qed_eth_if.h
include/linux/qed/qed_if.h
include/linux/qed/tcp_common.h
include/linux/rhashtable.h
include/linux/rtnetlink.h
include/linux/serial_8250.h
include/linux/skbuff.h
include/linux/smc91x.h
include/linux/sysctl.h
include/linux/tcp.h
include/linux/thread_info.h
include/net/af_rxrpc.h
include/net/af_unix.h
include/net/cfg80211.h
include/net/devlink.h
include/net/dsa.h
include/net/dst_metadata.h
include/net/flow.h
include/net/flow_dissector.h
include/net/ip_fib.h
include/net/ip_tunnels.h
include/net/l3mdev.h
include/net/lwtunnel.h
include/net/netfilter/nf_conntrack.h
include/net/netfilter/nf_conntrack_core.h
include/net/netfilter/nf_conntrack_ecache.h
include/net/netfilter/nf_conntrack_l4proto.h
include/net/netfilter/nf_log.h
include/net/netfilter/nf_tables.h
include/net/netfilter/nft_meta.h
include/net/netfilter/nft_reject.h
include/net/netns/conntrack.h
include/net/netns/xfrm.h
include/net/pkt_cls.h
include/net/route.h
include/net/sch_generic.h
include/net/sock.h
include/net/strparser.h
include/net/switchdev.h
include/net/tc_act/tc_skbmod.h [new file with mode: 0644]
include/net/tc_act/tc_tunnel_key.h [new file with mode: 0644]
include/net/tc_act/tc_vlan.h
include/net/tcp.h
include/net/udp.h
include/net/vxlan.h
include/net/xfrm.h
include/rdma/ib_verbs.h
include/rxrpc/packet.h
include/scsi/scsi_transport_sas.h
include/trace/events/rxrpc.h [new file with mode: 0644]
include/uapi/linux/Kbuild
include/uapi/linux/atm_zatm.h
include/uapi/linux/bpf.h
include/uapi/linux/bpf_perf_event.h [new file with mode: 0644]
include/uapi/linux/ethtool.h
include/uapi/linux/if_bridge.h
include/uapi/linux/if_link.h
include/uapi/linux/if_pppol2tp.h
include/uapi/linux/if_pppox.h
include/uapi/linux/if_tunnel.h
include/uapi/linux/inet_diag.h
include/uapi/linux/ipx.h
include/uapi/linux/libc-compat.h
include/uapi/linux/netfilter/nf_tables.h
include/uapi/linux/openvswitch.h
include/uapi/linux/pkt_cls.h
include/uapi/linux/snmp.h
include/uapi/linux/tc_act/tc_skbmod.h [new file with mode: 0644]
include/uapi/linux/tc_act/tc_tunnel_key.h [new file with mode: 0644]
include/uapi/linux/tc_act/tc_vlan.h
include/uapi/linux/tipc_netlink.h
include/xen/xen-ops.h
kernel/audit_watch.c
kernel/bpf/core.c
kernel/bpf/helpers.c
kernel/bpf/stackmap.c
kernel/bpf/verifier.c
kernel/configs/tiny.config
kernel/cpuset.c
kernel/events/core.c
kernel/events/uprobes.c
kernel/exit.c
kernel/fork.c
kernel/irq/affinity.c
kernel/irq/chip.c
kernel/irq/manage.c
kernel/kexec_file.c
kernel/memremap.c
kernel/power/qos.c
kernel/power/snapshot.c
kernel/printk/braille.c
kernel/printk/nmi.c
kernel/sched/cputime.c
kernel/seccomp.c
kernel/sysctl.c
kernel/time/tick-sched.c
kernel/time/timekeeping.c
kernel/time/timekeeping_debug.c
kernel/trace/blktrace.c
kernel/trace/bpf_trace.c
lib/Kconfig.debug
lib/Makefile
lib/rhashtable.c
lib/test_bpf.c
lib/test_hash.c
lib/usercopy.c [deleted file]
mm/Kconfig
mm/huge_memory.c
mm/memcontrol.c
mm/mempolicy.c
mm/page_alloc.c
mm/readahead.c
mm/usercopy.c
mm/vmscan.c
net/appletalk/ddp.c
net/atm/lec.c
net/atm/mpc.c
net/batman-adv/Kconfig
net/batman-adv/Makefile
net/batman-adv/bat_algo.c
net/batman-adv/bat_iv_ogm.c
net/batman-adv/bat_v.c
net/batman-adv/bat_v_ogm.c
net/batman-adv/bridge_loop_avoidance.c
net/batman-adv/debugfs.h
net/batman-adv/distributed-arp-table.c
net/batman-adv/gateway_client.c
net/batman-adv/hard-interface.c
net/batman-adv/icmp_socket.h
net/batman-adv/main.c
net/batman-adv/multicast.c
net/batman-adv/netlink.c
net/batman-adv/network-coding.c
net/batman-adv/originator.c
net/batman-adv/soft-interface.c
net/batman-adv/translation-table.c
net/batman-adv/tvlv.c
net/batman-adv/types.h
net/bluetooth/af_bluetooth.c
net/bluetooth/hci_request.c
net/bluetooth/hci_sock.c
net/bluetooth/l2cap_core.c
net/bluetooth/l2cap_sock.c
net/bridge/Makefile
net/bridge/br_device.c
net/bridge/br_fdb.c
net/bridge/br_forward.c
net/bridge/br_if.c
net/bridge/br_input.c
net/bridge/br_multicast.c
net/bridge/br_netlink.c
net/bridge/br_private.h
net/bridge/br_stp_if.c
net/bridge/br_switchdev.c [new file with mode: 0644]
net/bridge/br_sysfs_if.c
net/bridge/netfilter/ebtables.c
net/bridge/netfilter/nf_log_bridge.c
net/bridge/netfilter/nft_meta_bridge.c
net/core/dev.c
net/core/drop_monitor.c
net/core/filter.c
net/core/flow_dissector.c
net/core/lwtunnel.c
net/core/net_namespace.c
net/core/rtnetlink.c
net/core/skbuff.c
net/core/sock.c
net/dsa/Kconfig
net/dsa/Makefile
net/dsa/dsa.c
net/dsa/dsa2.c
net/dsa/dsa_priv.h
net/dsa/slave.c
net/dsa/tag_qca.c [new file with mode: 0644]
net/ipv4/af_inet.c
net/ipv4/devinet.c
net/ipv4/fib_frontend.c
net/ipv4/fib_rules.c
net/ipv4/fib_semantics.c
net/ipv4/fib_trie.c
net/ipv4/fou.c
net/ipv4/inet_diag.c
net/ipv4/ip_gre.c
net/ipv4/ip_output.c
net/ipv4/ip_sockglue.c
net/ipv4/ip_tunnel_core.c
net/ipv4/ipconfig.c
net/ipv4/netfilter/Kconfig
net/ipv4/netfilter/Makefile
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c [deleted file]
net/ipv4/netfilter/nf_conntrack_proto_icmp.c
net/ipv4/netfilter/nf_dup_ipv4.c
net/ipv4/netfilter/nf_log_arp.c
net/ipv4/netfilter/nf_log_ipv4.c
net/ipv4/netfilter/nft_reject_ipv4.c
net/ipv4/proc.c
net/ipv4/raw.c
net/ipv4/route.c
net/ipv4/tcp.c
net/ipv4/tcp_diag.c
net/ipv4/tcp_fastopen.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_metrics.c
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_output.c
net/ipv4/tcp_yeah.c
net/ipv4/udp.c
net/ipv4/udp_diag.c
net/ipv4/udplite.c
net/ipv4/xfrm4_policy.c
net/ipv6/addrconf.c
net/ipv6/af_inet6.c
net/ipv6/fib6_rules.c
net/ipv6/ila/ila_lwt.c
net/ipv6/ila/ila_xlat.c
net/ipv6/ip6_fib.c
net/ipv6/ip6_output.c
net/ipv6/ip6_tunnel.c
net/ipv6/ndisc.c
net/ipv6/netfilter/nf_log_ipv6.c
net/ipv6/netfilter/nft_reject_ipv6.c
net/ipv6/output_core.c
net/ipv6/ping.c
net/ipv6/raw.c
net/ipv6/route.c
net/ipv6/tcp_ipv6.c
net/ipv6/udp.c
net/ipv6/udp_impl.h
net/ipv6/udplite.c
net/ipv6/xfrm6_input.c
net/ipv6/xfrm6_policy.c
net/irda/af_irda.c
net/kcm/kcmproc.c
net/kcm/kcmsock.c
net/l2tp/l2tp_core.c
net/l2tp/l2tp_core.h
net/l2tp/l2tp_eth.c
net/l2tp/l2tp_netlink.c
net/l2tp/l2tp_ppp.c
net/l3mdev/l3mdev.c
net/mac80211/tdls.c
net/mpls/af_mpls.c
net/mpls/mpls_gso.c
net/mpls/mpls_iptunnel.c
net/netfilter/Kconfig
net/netfilter/Makefile
net/netfilter/ipvs/ip_vs_nfct.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_ecache.c
net/netfilter/nf_conntrack_ftp.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_pptp.c
net/netfilter/nf_conntrack_proto.c
net/netfilter/nf_conntrack_proto_dccp.c
net/netfilter/nf_conntrack_proto_generic.c
net/netfilter/nf_conntrack_proto_sctp.c
net/netfilter/nf_conntrack_proto_tcp.c
net/netfilter/nf_conntrack_proto_udp.c
net/netfilter/nf_conntrack_proto_udplite.c
net/netfilter/nf_conntrack_standalone.c
net/netfilter/nf_log.c
net/netfilter/nf_nat_core.c
net/netfilter/nf_tables_api.c
net/netfilter/nf_tables_netdev.c
net/netfilter/nfnetlink_acct.c
net/netfilter/nfnetlink_cttimeout.c
net/netfilter/nfnetlink_log.c
net/netfilter/nft_hash.c
net/netfilter/nft_meta.c
net/netfilter/nft_numgen.c [new file with mode: 0644]
net/netfilter/nft_quota.c [new file with mode: 0644]
net/netfilter/nft_rbtree.c [deleted file]
net/netfilter/nft_reject.c
net/netfilter/nft_reject_inet.c
net/netfilter/nft_set_hash.c [new file with mode: 0644]
net/netfilter/nft_set_rbtree.c [new file with mode: 0644]
net/netfilter/xt_TPROXY.c
net/netfilter/xt_conntrack.c
net/netfilter/xt_nfacct.c
net/netfilter/xt_physdev.c
net/netlink/diag.c
net/netlink/genetlink.c
net/openvswitch/actions.c
net/openvswitch/conntrack.c
net/openvswitch/datapath.c
net/openvswitch/flow.c
net/openvswitch/flow.h
net/openvswitch/flow_netlink.c
net/openvswitch/vport.c
net/rxrpc/Kconfig
net/rxrpc/Makefile
net/rxrpc/af_rxrpc.c
net/rxrpc/ar-internal.h
net/rxrpc/call_accept.c
net/rxrpc/call_event.c
net/rxrpc/call_object.c
net/rxrpc/conn_client.c
net/rxrpc/conn_event.c
net/rxrpc/conn_object.c
net/rxrpc/conn_service.c
net/rxrpc/input.c
net/rxrpc/insecure.c
net/rxrpc/local_event.c
net/rxrpc/local_object.c
net/rxrpc/misc.c
net/rxrpc/output.c
net/rxrpc/peer_event.c
net/rxrpc/peer_object.c
net/rxrpc/proc.c
net/rxrpc/recvmsg.c
net/rxrpc/rxkad.c
net/rxrpc/security.c
net/rxrpc/sendmsg.c [new file with mode: 0644]
net/rxrpc/skbuff.c
net/rxrpc/sysctl.c
net/rxrpc/utils.c
net/sched/Kconfig
net/sched/Makefile
net/sched/act_bpf.c
net/sched/act_ife.c
net/sched/act_skbmod.c [new file with mode: 0644]
net/sched/act_tunnel_key.c [new file with mode: 0644]
net/sched/act_vlan.c
net/sched/cls_basic.c
net/sched/cls_bpf.c
net/sched/cls_cgroup.c
net/sched/cls_flow.c
net/sched/cls_flower.c
net/sched/cls_fw.c
net/sched/cls_route.c
net/sched/cls_rsvp.h
net/sched/cls_tcindex.c
net/sched/cls_u32.c
net/sched/sch_api.c
net/sched/sch_generic.c
net/sctp/auth.c
net/sctp/input.c
net/sctp/inqueue.c
net/sctp/output.c
net/sctp/sctp_diag.c
net/strparser/strparser.c
net/sunrpc/clnt.c
net/switchdev/switchdev.c
net/tipc/bcast.c
net/tipc/bcast.h
net/tipc/bearer.c
net/tipc/bearer.h
net/tipc/link.c
net/tipc/link.h
net/tipc/msg.h
net/tipc/name_distr.c
net/tipc/net.h
net/tipc/netlink.c
net/tipc/node.c
net/tipc/node.h
net/tipc/udp_media.c
net/tipc/udp_media.h [new file with mode: 0644]
net/unix/af_unix.c
net/wireless/wext-core.c
net/x25/af_x25.c
net/xfrm/xfrm_algo.c
net/xfrm/xfrm_input.c
net/xfrm/xfrm_policy.c
net/xfrm/xfrm_replay.c
net/xfrm/xfrm_state.c
net/xfrm/xfrm_sysctl.c
net/xfrm/xfrm_user.c
samples/bpf/Makefile
samples/bpf/bpf_helpers.h
samples/bpf/bpf_load.c
samples/bpf/sampleip_kern.c [new file with mode: 0644]
samples/bpf/sampleip_user.c [new file with mode: 0644]
samples/bpf/tcbpf2_kern.c [new file with mode: 0644]
samples/bpf/test_tunnel_bpf.sh [new file with mode: 0755]
samples/bpf/test_verifier.c
samples/bpf/trace_event_kern.c [new file with mode: 0644]
samples/bpf/trace_event_user.c [new file with mode: 0644]
scripts/checkpatch.pl
scripts/get_maintainer.pl
scripts/package/builddeb
scripts/tags.sh
security/Kconfig
sound/core/rawmidi.c
sound/core/timer.c
sound/firewire/fireworks/fireworks.h
sound/firewire/fireworks/fireworks_hwdep.c
sound/firewire/fireworks/fireworks_proc.c
sound/firewire/fireworks/fireworks_transaction.c
sound/firewire/tascam/tascam-hwdep.c
sound/pci/hda/patch_realtek.c
sound/soc/atmel/atmel_ssc_dai.c
sound/soc/codecs/da7213.c
sound/soc/codecs/max98371.c
sound/soc/codecs/nau8825.c
sound/soc/codecs/wm2000.c
sound/soc/generic/Makefile
sound/soc/generic/simple-card-utils.c
sound/soc/intel/skylake/skl-sst-utils.c
sound/soc/intel/skylake/skl.c
sound/soc/omap/omap-abe-twl6040.c
sound/soc/omap/omap-mcpdm.c
sound/soc/samsung/s3c24xx_uda134x.c
sound/soc/sh/rcar/src.c
sound/soc/soc-compress.c
sound/soc/soc-core.c
sound/soc/soc-dapm.c
sound/usb/line6/pcm.c
sound/usb/line6/pod.c
sound/usb/quirks.c
tools/arch/arm64/include/uapi/asm/kvm.h
tools/arch/s390/include/uapi/asm/kvm.h
tools/arch/s390/include/uapi/asm/sie.h
tools/gpio/gpio-event-mon.c
tools/iio/iio_generic_buffer.c
tools/include/linux/string.h
tools/perf/arch/powerpc/util/sym-handling.c
tools/perf/arch/x86/util/intel-pt.c
tools/perf/builtin-mem.c
tools/perf/builtin-script.c
tools/perf/util/evsel.c
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
tools/perf/util/jitdump.c
tools/perf/util/probe-file.c
tools/perf/util/symbol-elf.c
tools/perf/util/unwind-libdw.c
tools/perf/util/unwind-libunwind-local.c
virt/kvm/arm/arch_timer.c
virt/kvm/arm/vgic/vgic-its.c
virt/kvm/arm/vgic/vgic-mmio-v3.c
virt/kvm/arm/vgic/vgic-v3.c
virt/kvm/arm/vgic/vgic.c
virt/kvm/arm/vgic/vgic.h

index 2a91c14..de22dae 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -88,6 +88,7 @@ Kay Sievers <kay.sievers@vrfy.org>
 Kenneth W Chen <kenneth.w.chen@intel.com>
 Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com>
 Koushik <raghavendra.koushik@neterion.com>
+Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski@samsung.com>
 Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com>
 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
 Leonid I Ananiev <leonid.i.ananiev@intel.com>
@@ -158,6 +159,8 @@ Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
 Viresh Kumar <vireshk@kernel.org> <viresh.kumar@st.com>
 Viresh Kumar <vireshk@kernel.org> <viresh.linux@gmail.com>
 Viresh Kumar <vireshk@kernel.org> <viresh.kumar2@arm.com>
+Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@virtuozzo.com>
+Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com>
 Takashi YOSHII <takashi.yoshii.zj@renesas.com>
 Yusuke Goda <goda.yusuke@renesas.com>
 Gustavo Padovan <gustavo@las.ic.unicamp.br>
index 43f78b8..df449d7 100644 (file)
@@ -1,7 +1,7 @@
 # Note: This documents additional properties of any device beyond what
 # is documented in Documentation/sysfs-rules.txt
 
-What:          /sys/devices/*/of_path
+What:          /sys/devices/*/of_node
 Date:          February 2015
 Contact:       Device Tree mailing list <devicetree@vger.kernel.org>
 Description:
index c55df29..cd9c9f6 100644 (file)
@@ -94,14 +94,11 @@ has a requirements for a minimum number of vectors the driver can pass a
 min_vecs argument set to this limit, and the PCI core will return -ENOSPC
 if it can't meet the minimum number of vectors.
 
-The flags argument should normally be set to 0, but can be used to pass the
-PCI_IRQ_NOMSI and PCI_IRQ_NOMSIX flag in case a device claims to support
-MSI or MSI-X, but the support is broken, or to pass PCI_IRQ_NOLEGACY in
-case the device does not support legacy interrupt lines.
-
-By default this function will spread the interrupts around the available
-CPUs, but this feature can be disabled by passing the PCI_IRQ_NOAFFINITY
-flag.
+The flags argument is used to specify which type of interrupt can be used
+by the device and the driver (PCI_IRQ_LEGACY, PCI_IRQ_MSI, PCI_IRQ_MSIX).
+A convenient short-hand (PCI_IRQ_ALL_TYPES) is also available to ask for
+any possible kind of interrupt.  If the PCI_IRQ_AFFINITY flag is set,
+pci_alloc_irq_vectors() will spread the interrupts around the available CPUs.
 
 To get the Linux IRQ numbers passed to request_irq() and free_irq() and the
 vectors, use the following function:
@@ -131,7 +128,7 @@ larger than the number supported by the device it will automatically be
 capped to the supported limit, so there is no need to query the number of
 vectors supported beforehand:
 
-       nvec = pci_alloc_irq_vectors(pdev, 1, nvec, 0);
+       nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_ALL_TYPES)
        if (nvec < 0)
                goto out_err;
 
@@ -140,7 +137,7 @@ interrupts it can request a particular number of interrupts by passing that
 number to pci_alloc_irq_vectors() function as both 'min_vecs' and
 'max_vecs' parameters:
 
-       ret = pci_alloc_irq_vectors(pdev, nvec, nvec, 0);
+       ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_ALL_TYPES);
        if (ret < 0)
                goto out_err;
 
@@ -148,15 +145,14 @@ The most notorious example of the request type described above is enabling
 the single MSI mode for a device.  It could be done by passing two 1s as
 'min_vecs' and 'max_vecs':
 
-       ret = pci_alloc_irq_vectors(pdev, 1, 1, 0);
+       ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
        if (ret < 0)
                goto out_err;
 
 Some devices might not support using legacy line interrupts, in which case
-the PCI_IRQ_NOLEGACY flag can be used to fail the request if the platform
-can't provide MSI or MSI-X interrupts:
+the driver can specify that only MSI or MSI-X is acceptable:
 
-       nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_NOLEGACY);
+       nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_MSI | PCI_IRQ_MSIX);
        if (nvec < 0)
                goto out_err;
 
index 123881f..77f49dc 100644 (file)
@@ -124,7 +124,6 @@ initialization with a pointer to a structure describing the driver
 
 The ID table is an array of struct pci_device_id entries ending with an
 all-zero entry.  Definitions with static const are generally preferred.
-Use of the deprecated macro DEFINE_PCI_DEVICE_TABLE should be avoided.
 
 Each entry consists of:
 
index ffca443..15cdb7b 100644 (file)
@@ -18,13 +18,17 @@ and config2 fields of the perf_event_attr structure. The "events"
 directory provides configuration templates for all documented
 events, that can be used with perf tool. For example "xp_valid_flit"
 is an equivalent of "type=0x8,event=0x4". Other parameters must be
-explicitly specified. For events originating from device, "node"
-defines its index. All crosspoint events require "xp" (index),
-"port" (device port number) and "vc" (virtual channel ID) and
-"dir" (direction). Watchpoints (special "event" value 0xfe) also
-require comparator values ("cmp_l" and "cmp_h") and "mask", being
-index of the comparator mask.
+explicitly specified.
 
+For events originating from device, "node" defines its index.
+
+Crosspoint PMU events require "xp" (index), "bus" (bus number)
+and "vc" (virtual channel ID).
+
+Crosspoint watchpoint-based events (special "event" value 0xfe)
+require "xp" and "vc" as as above plus "port" (device port index),
+"dir" (transmit/receive direction), comparator values ("cmp_l"
+and "cmp_h") and "mask", being index of the comparator mask.
 Masks are defined separately from the event description
 (due to limited number of the config values) in the "cmp_mask"
 directory, with first 8 configurable by user and additional
index 4da60b4..ccc6032 100644 (file)
@@ -53,6 +53,7 @@ stable kernels.
 | ARM            | Cortex-A57      | #832075         | ARM64_ERRATUM_832075    |
 | ARM            | Cortex-A57      | #852523         | N/A                     |
 | ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220    |
+| ARM            | Cortex-A72      | #853709         | N/A                     |
 | ARM            | MMU-500         | #841119,#826419 | N/A                     |
 |                |                 |                 |                         |
 | Cavium         | ThunderX ITS    | #22375, #24313  | CAVIUM_ERRATUM_22375    |
index 96b7aa6..106ae9c 100644 (file)
@@ -131,7 +131,7 @@ pygments_style = 'sphinx'
 todo_include_todos = False
 
 primary_domain = 'C'
-highlight_language = 'C'
+highlight_language = 'guess'
 
 # -- Options for HTML output ----------------------------------------------
 
index fc64749..8d9773f 100644 (file)
@@ -103,7 +103,7 @@ Config Main Menu
        Power management options (ACPI, APM)  --->
                CPU Frequency scaling  --->
                        [*] CPU Frequency scaling
-                       <*>   CPU frequency translation statistics 
+                       [*]   CPU frequency translation statistics
                        [*]     CPU frequency translation statistics details
 
 
index bf99e2f..205593f 100644 (file)
@@ -16,6 +16,11 @@ Required properties:
 - vref-supply: The regulator supply ADC reference voltage.
 - #io-channel-cells: Should be 1, see ../iio-bindings.txt
 
+Optional properties:
+- resets: Must contain an entry for each entry in reset-names if need support
+         this option. See ../reset/reset.txt for details.
+- reset-names: Must include the name "saradc-apb".
+
 Example:
        saradc: saradc@2006c000 {
                compatible = "rockchip,saradc";
@@ -23,6 +28,8 @@ Example:
                interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
                clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>;
                clock-names = "saradc", "apb_pclk";
+               resets = <&cru SRST_SARADC>;
+               reset-names = "saradc-apb";
                #io-channel-cells = <1>;
                vref-supply = <&vcc18>;
        };
index 30d4875..fb40891 100644 (file)
@@ -6,9 +6,13 @@ Required properties:
 - reg: addresses and length of the register sets for the device, must be 6
   pairs of register addresses and lengths
 - interrupts: interrupts for the devices, must be two interrupts
+- #address-cells: must be 1, see dsa/dsa.txt
+- #size-cells: must be 0, see dsa/dsa.txt
+
+Deprecated binding required properties:
+
 - dsa,mii-bus: phandle to the MDIO bus controller, see dsa/dsa.txt
 - dsa,ethernet: phandle to the CPU network interface controller, see dsa/dsa.txt
-- #size-cells: must be 0
 - #address-cells: must be 2, see dsa/dsa.txt
 
 Subnodes:
@@ -39,6 +43,45 @@ Optional properties:
 
 Example:
 
+switch_top@f0b00000 {
+       compatible = "simple-bus";
+       #size-cells = <1>;
+       #address-cells = <1>;
+       ranges = <0 0xf0b00000 0x40804>;
+
+       ethernet_switch@0 {
+               compatible = "brcm,bcm7445-switch-v4.0";
+               #size-cells = <0>;
+               #address-cells = <1>;
+               reg = <0x0 0x40000
+                       0x40000 0x110
+                       0x40340 0x30
+                       0x40380 0x30
+                       0x40400 0x34
+                       0x40600 0x208>;
+               reg-names = "core", "reg", intrl2_0", "intrl2_1",
+                           "fcb, "acb";
+               interrupts = <0 0x18 0
+                               0 0x19 0>;
+               brcm,num-gphy = <1>;
+               brcm,num-rgmii-ports = <2>;
+               brcm,fcb-pause-override;
+               brcm,acb-packets-inflight;
+
+               ports {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       port@0 {
+                               label = "gphy";
+                               reg = <0>;
+                       };
+               };
+       };
+};
+
+Example using the old DSA DeviceTree binding:
+
 switch_top@f0b00000 {
        compatible = "simple-bus";
        #size-cells = <1>;
diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt
new file mode 100644 (file)
index 0000000..9c67ee4
--- /dev/null
@@ -0,0 +1,89 @@
+* Qualcomm Atheros QCA8xxx switch family
+
+Required properties:
+
+- compatible: should be "qca,qca8337"
+- #size-cells: must be 0
+- #address-cells: must be 1
+
+Subnodes:
+
+The integrated switch subnode should be specified according to the binding
+described in dsa/dsa.txt. As the QCA8K switches do not have a N:N mapping of
+port and PHY id, each subnode describing a port needs to have a valid phandle
+referencing the internal PHY connected to it. The CPU port of this switch is
+always port 0.
+
+Example:
+
+
+       &mdio0 {
+               phy_port1: phy@0 {
+                       reg = <0>;
+               };
+
+               phy_port2: phy@1 {
+                       reg = <1>;
+               };
+
+               phy_port3: phy@2 {
+                       reg = <2>;
+               };
+
+               phy_port4: phy@3 {
+                       reg = <3>;
+               };
+
+               phy_port5: phy@4 {
+                       reg = <4>;
+               };
+
+               switch0@0 {
+                       compatible = "qca,qca8337";
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       reg = <0>;
+
+                       ports {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+                               port@0 {
+                                       reg = <0>;
+                                       label = "cpu";
+                                       ethernet = <&gmac1>;
+                                       phy-mode = "rgmii";
+                               };
+
+                               port@1 {
+                                       reg = <1>;
+                                       label = "lan1";
+                                       phy-handle = <&phy_port1>;
+                               };
+
+                               port@2 {
+                                       reg = <2>;
+                                       label = "lan2";
+                                       phy-handle = <&phy_port2>;
+                               };
+
+                               port@3 {
+                                       reg = <3>;
+                                       label = "lan3";
+                                       phy-handle = <&phy_port3>;
+                               };
+
+                               port@4 {
+                                       reg = <4>;
+                                       label = "lan4";
+                                       phy-handle = <&phy_port4>;
+                               };
+
+                               port@5 {
+                                       reg = <5>;
+                                       label = "wan";
+                                       phy-handle = <&phy_port5>;
+                               };
+                       };
+               };
+       };
index b5a42df..1506e94 100644 (file)
@@ -21,6 +21,7 @@ Required properties:
 - clock-names: Tuple listing input clock names.
        Required elements: 'pclk', 'hclk'
        Optional elements: 'tx_clk'
+       Optional elements: 'rx_clk' applies to cdns,zynqmp-gem
 - clocks: Phandles to input clocks.
 
 Optional properties for PHY child node:
diff --git a/Documentation/devicetree/bindings/net/qcom-emac.txt b/Documentation/devicetree/bindings/net/qcom-emac.txt
new file mode 100644 (file)
index 0000000..346e6c7
--- /dev/null
@@ -0,0 +1,111 @@
+Qualcomm Technologies EMAC Gigabit Ethernet Controller
+
+This network controller consists of two devices: a MAC and an SGMII
+internal PHY.  Each device is represented by a device tree node.  A phandle
+connects the MAC node to its corresponding internal phy node.  Another
+phandle points to the external PHY node.
+
+Required properties:
+
+MAC node:
+- compatible : Should be "qcom,fsm9900-emac".
+- reg : Offset and length of the register regions for the device
+- interrupts : Interrupt number used by this controller
+- mac-address : The 6-byte MAC address. If present, it is the default
+       MAC address.
+- internal-phy : phandle to the internal PHY node
+- phy-handle : phandle the the external PHY node
+
+Internal PHY node:
+- compatible : Should be "qcom,fsm9900-emac-sgmii" or "qcom,qdf2432-emac-sgmii".
+- reg : Offset and length of the register region(s) for the device
+- interrupts : Interrupt number used by this controller
+
+The external phy child node:
+- reg : The phy address
+
+Example:
+
+FSM9900:
+
+soc {
+       #address-cells = <1>;
+       #size-cells = <1>;
+
+       emac0: ethernet@feb20000 {
+               compatible = "qcom,fsm9900-emac";
+               reg = <0xfeb20000 0x10000>,
+                     <0xfeb36000 0x1000>;
+               interrupts = <76>;
+
+               clocks = <&gcc 0>, <&gcc 1>, <&gcc 3>, <&gcc 4>, <&gcc 5>,
+                       <&gcc 6>, <&gcc 7>;
+               clock-names = "axi_clk", "cfg_ahb_clk", "high_speed_clk",
+                       "mdio_clk", "tx_clk", "rx_clk", "sys_clk";
+
+               internal-phy = <&emac_sgmii>;
+
+               phy-handle = <&phy0>;
+
+               #address-cells = <1>;
+               #size-cells = <0>;
+               phy0: ethernet-phy@0 {
+                       reg = <0>;
+               };
+
+               pinctrl-names = "default";
+               pinctrl-0 = <&mdio_pins_a>;
+       };
+
+       emac_sgmii: ethernet@feb38000 {
+               compatible = "qcom,fsm9900-emac-sgmii";
+               reg = <0xfeb38000 0x1000>;
+               interrupts = <80>;
+       };
+
+       tlmm: pinctrl@fd510000 {
+               compatible = "qcom,fsm9900-pinctrl";
+
+               mdio_pins_a: mdio {
+                       state {
+                               pins = "gpio123", "gpio124";
+                               function = "mdio";
+                       };
+               };
+       };
+
+
+QDF2432:
+
+soc {
+       #address-cells = <2>;
+       #size-cells = <2>;
+
+       emac0: ethernet@38800000 {
+               compatible = "qcom,fsm9900-emac";
+               reg = <0x0 0x38800000 0x0 0x10000>,
+                     <0x0 0x38816000 0x0 0x1000>;
+               interrupts = <0 256 4>;
+
+               clocks = <&gcc 0>, <&gcc 1>, <&gcc 3>, <&gcc 4>, <&gcc 5>,
+                        <&gcc 6>, <&gcc 7>;
+               clock-names = "axi_clk", "cfg_ahb_clk", "high_speed_clk",
+                       "mdio_clk", "tx_clk", "rx_clk", "sys_clk";
+
+               internal-phy = <&emac_sgmii>;
+
+               phy-handle = <&phy0>;
+
+               #address-cells = <1>;
+               #size-cells = <0>;
+               phy0: ethernet-phy@4 {
+                       reg = <4>;
+               };
+       };
+
+       emac_sgmii: ethernet@410400 {
+               compatible = "qcom,qdf2432-emac-sgmii";
+               reg = <0x0 0x00410400 0x0 0xc00>, /* Base address */
+                     <0x0 0x00410000 0x0 0x400>; /* Per-lane digital */
+               interrupts = <0 254 1>;
+       };
index cccd945..95383c5 100644 (file)
@@ -3,8 +3,12 @@ Rockchip SoC RK3288 10/100/1000 Ethernet driver(GMAC)
 The device node has following properties.
 
 Required properties:
- - compatible: Can be one of "rockchip,rk3228-gmac", "rockchip,rk3288-gmac",
-                             "rockchip,rk3368-gmac"
+ - compatible: should be "rockchip,<name>-gamc"
+   "rockchip,rk3228-gmac": found on RK322x SoCs
+   "rockchip,rk3288-gmac": found on RK3288 SoCs
+   "rockchip,rk3366-gmac": found on RK3366 SoCs
+   "rockchip,rk3368-gmac": found on RK3368 SoCs
+   "rockchip,rk3399-gmac": found on RK3399 SoCs
  - reg: addresses and length of the register sets for the device.
  - interrupts: Should contain the GMAC interrupts.
  - interrupt-names: Should contain the interrupt names "macirq".
index 3fed3c1..16c3a95 100644 (file)
@@ -3,9 +3,11 @@
 Required properties:
 - compatible : Should be "smsc,lan<model>", "smsc,lan9115"
 - reg : Address and length of the io space for SMSC LAN
-- interrupts : Should contain SMSC LAN interrupt line
-- interrupt-parent : Should be the phandle for the interrupt controller
-  that services interrupts for this device
+- interrupts : one or two interrupt specifiers
+  - The first interrupt is the SMSC LAN interrupt line
+  - The second interrupt (if present) is the PME (power
+    management event) interrupt that is able to wake up the host
+     system with a 50ms pulse on network activity
 - phy-mode : See ethernet.txt file in the same directory
 
 Optional properties:
@@ -21,6 +23,10 @@ Optional properties:
   external PHY
 - smsc,save-mac-address : Indicates that mac address needs to be saved
   before resetting the controller
+- reset-gpios : a GPIO line connected to the RESET (active low) signal
+  of the device. On many systems this is wired high so the device goes
+  out of reset at power-on, but if it is under program control, this
+  optional GPIO can wake up in response to it.
 
 Examples:
 
@@ -29,7 +35,8 @@ lan9220@f4000000 {
        reg = <0xf4000000 0x2000000>;
        phy-mode = "mii";
        interrupt-parent = <&gpio1>;
-       interrupts = <31>;
+       interrupts = <31>, <32>;
+       reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>;
        reg-io-width = <4>;
        smsc,irq-push-pull;
 };
diff --git a/Documentation/devicetree/bindings/net/stm32-dwmac.txt b/Documentation/devicetree/bindings/net/stm32-dwmac.txt
new file mode 100644 (file)
index 0000000..c35afb7
--- /dev/null
@@ -0,0 +1,32 @@
+STMicroelectronics STM32 / MCU DWMAC glue layer controller
+
+This file documents platform glue layer for stmmac.
+Please see stmmac.txt for the other unchanged properties.
+
+The device node has following properties.
+
+Required properties:
+- compatible:  Should be "st,stm32-dwmac" to select glue, and
+              "snps,dwmac-3.50a" to select IP version.
+- clocks: Must contain a phandle for each entry in clock-names.
+- clock-names: Should be "stmmaceth" for the host clock.
+              Should be "mac-clk-tx" for the MAC TX clock.
+              Should be "mac-clk-rx" for the MAC RX clock.
+- st,syscon : Should be phandle/offset pair. The phandle to the syscon node which
+             encompases the glue register, and the offset of the control register.
+Example:
+
+       ethernet@40028000 {
+               compatible = "st,stm32-dwmac", "snps,dwmac-3.50a";
+               status = "disabled";
+               reg = <0x40028000 0x8000>;
+               reg-names = "stmmaceth";
+               interrupts = <0 61 0>, <0 62 0>;
+               interrupt-names = "macirq", "eth_wake_irq";
+               clock-names = "stmmaceth", "mac-clk-tx", "mac-clk-rx";
+               clocks = <&rcc 0 25>, <&rcc 0 26>, <&rcc 0 27>;
+               st,syscon = <&syscfg 0x4>;
+               snps,pbl = <8>;
+               snps,mixed-burst;
+               dma-ranges;
+       };
index f5561ac..936ab5b 100644 (file)
@@ -42,9 +42,6 @@ Optional properties:
 - auto-flow-control: one way to enable automatic flow control support. The
   driver is allowed to detect support for the capability even without this
   property.
-- {rts,cts,dtr,dsr,rng,dcd}-gpios: specify a GPIO for RTS/CTS/DTR/DSR/RI/DCD
-  line respectively. It will use specified GPIO instead of the peripheral
-  function pin for the UART feature. If unsure, don't specify this property.
 
 Note:
 * fsl,ns16550:
@@ -66,19 +63,3 @@ Example:
                interrupts = <10>;
                reg-shift = <2>;
        };
-
-Example for OMAP UART using GPIO-based modem control signals:
-
-       uart4: serial@49042000 {
-               compatible = "ti,omap3-uart";
-               reg = <0x49042000 0x400>;
-               interrupts = <80>;
-               ti,hwmods = "uart4";
-               clock-frequency = <48000000>;
-               cts-gpios = <&gpio3 5 GPIO_ACTIVE_LOW>;
-               rts-gpios = <&gpio3 6 GPIO_ACTIVE_LOW>;
-               dtr-gpios = <&gpio1 12 GPIO_ACTIVE_LOW>;
-               dsr-gpios = <&gpio1 13 GPIO_ACTIVE_LOW>;
-               dcd-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>;
-               rng-gpios = <&gpio1 15 GPIO_ACTIVE_LOW>;
-       };
index 6f6c2f8..0741dff 100644 (file)
@@ -8,8 +8,6 @@ Required properties:
 - interrupts: Interrupt number for McPDM
 - interrupt-parent: The parent interrupt controller
 - ti,hwmods: Name of the hwmod associated to the McPDM
-- clocks:  phandle for the pdmclk provider, likely <&twl6040>
-- clock-names: Must be "pdmclk"
 
 Example:
 
@@ -21,11 +19,3 @@ mcpdm: mcpdm@40132000 {
        interrupt-parent = <&gic>;
        ti,hwmods = "mcpdm";
 };
-
-In board DTS file the pdmclk needs to be added:
-
-&mcpdm {
-       clocks = <&twl6040>;
-       clock-names = "pdmclk";
-       status = "okay";
-};
index 41b817f..88b6ea1 100644 (file)
@@ -62,7 +62,7 @@ For more examples of cooling devices, refer to the example sections below.
 Required properties:
 - #cooling-cells:      Used to provide cooling device specific information
   Type: unsigned       while referring to it. Must be at least 2, in order
-  Size: one cell       to specify minimum and maximum cooling state used
+  Size: one cell       to specify minimum and maximum cooling state used
                        in the reference. The first cell is the minimum
                        cooling state requested and the second cell is
                        the maximum cooling state requested in the reference.
@@ -119,7 +119,7 @@ Required properties:
 Optional property:
 - contribution:                The cooling contribution to the thermal zone of the
   Type: unsigned       referred cooling device at the referred trip point.
-  Size: one cell       The contribution is a ratio of the sum
+  Size: one cell       The contribution is a ratio of the sum
                        of all cooling contributions within a thermal zone.
 
 Note: Using the THERMAL_NO_LIMIT (-1UL) constant in the cooling-device phandle
@@ -145,7 +145,7 @@ Required properties:
   Size: one cell
 
 - thermal-sensors:     A list of thermal sensor phandles and sensor specifier
-  Type: list of        used while monitoring the thermal zone.
+  Type: list of                used while monitoring the thermal zone.
   phandles + sensor
   specifier
 
@@ -473,7 +473,7 @@ thermal-zones {
                                  <&adc>;       /* pcb north */
 
                /* hotspot = 100 * bandgap - 120 * adc + 484 */
-               coefficients =          <100    -120    484>;
+               coefficients =          <100    -120    484>;
 
                trips {
                        ...
@@ -502,7 +502,7 @@ from the ADC sensor. The binding would be then:
         thermal-sensors =  <&adc>;
 
                /* hotspot = 1 * adc + 6000 */
-       coefficients =          <1      6000>;
+       coefficients =          <1      6000>;
 
 (d) - Board thermal
 
index d6259c7..bcbf971 100644 (file)
@@ -183,12 +183,10 @@ The copy_up operation essentially creates a new, identical file and
 moves it over to the old name.  The new file may be on a different
 filesystem, so both st_dev and st_ino of the file may change.
 
-Any open files referring to this inode will access the old data and
-metadata.  Similarly any file locks obtained before copy_up will not
-apply to the copied up file.
+Any open files referring to this inode will access the old data.
 
-On a file opened with O_RDONLY fchmod(2), fchown(2), futimesat(2) and
-fsetxattr(2) will fail with EROFS.
+Any file locks (and leases) obtained before copy_up will not apply
+to the copied up file.
 
 If a file with multiple hard links is copied up, then this will
 "break" the link.  Changes will not be propagated to other names
index 2a1bf69..8c10a91 100644 (file)
@@ -19,5 +19,5 @@ enhancements. It can monitor up to 4 voltages, 16 temperatures and
 implemented in this driver.
 
 Specification of the chip can be found here:
-ftp:///pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/BMC-Teutates_Specification_V1.21.pdf
-ftp:///pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/Fujitsu_mainboards-1-Sensors_HowTo-en-US.pdf
+ftp://ftp.ts.fujitsu.com/pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/BMC-Teutates_Specification_V1.21.pdf
+ftp://ftp.ts.fujitsu.com/pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/Fujitsu_mainboards-1-Sensors_HowTo-en-US.pdf
index 80807ad..7e2a228 100644 (file)
@@ -145,6 +145,11 @@ If you want to add slave support to the bus driver:
 
 * Catch the slave interrupts and send appropriate i2c_slave_events to the backend.
 
+Note that most hardware supports being master _and_ slave on the same bus. So,
+if you extend a bus driver, please make sure that the driver supports that as
+well. In almost all cases, slave support does not need to disable the master
+functionality.
+
 Check the i2c-rcar driver as an example.
 
 
index c4eb504..391decc 100644 (file)
@@ -366,8 +366,6 @@ Domain`_ references.
 Cross-referencing from reStructuredText
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. highlight:: none
-
 To cross-reference the functions and types defined in the kernel-doc comments
 from reStructuredText documents, please use the `Sphinx C Domain`_
 references. For example::
@@ -390,8 +388,6 @@ For further details, please refer to the `Sphinx C Domain`_ documentation.
 Function documentation
 ----------------------
 
-.. highlight:: c
-
 The general format of a function and function-like macro kernel-doc comment is::
 
   /**
@@ -572,8 +568,6 @@ DocBook XML [DEPRECATED]
 Converting DocBook to Sphinx
 ----------------------------
 
-.. highlight:: none
-
 Over time, we expect all of the documents under ``Documentation/DocBook`` to be
 converted to Sphinx and reStructuredText. For most DocBook XML documents, a good
 enough solution is to use the simple ``Documentation/sphinx/tmplcvt`` script,
index 46c030a..a4f4d69 100644 (file)
@@ -3032,6 +3032,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                                PAGE_SIZE is used as alignment.
                                PCI-PCI bridge can be specified, if resource
                                windows need to be expanded.
+                               To specify the alignment for several
+                               instances of a device, the PCI vendor,
+                               device, subvendor, and subdevice may be
+                               specified, e.g., 4096@pci:8086:9c22:103c:198f
                ecrc=           Enable/disable PCIe ECRC (transaction layer
                                end-to-end CRC checking).
                                bios: Use BIOS/firmware settings. This is the
index 9d05ed7..6d6c07c 100644 (file)
@@ -227,9 +227,9 @@ to address individual switches in the tree.
 
 dsa_switch: structure describing a switch device in the tree, referencing a
 dsa_switch_tree as a backpointer, slave network devices, master network device,
-and a reference to the backing dsa_switch_driver
+and a reference to the backing dsa_switch_ops
 
-dsa_switch_driver: structure referencing function pointers, see below for a full
+dsa_switch_ops: structure referencing function pointers, see below for a full
 description.
 
 Design limitations
@@ -357,10 +357,10 @@ regular HWMON devices in /sys/class/hwmon/.
 Driver development
 ==================
 
-DSA switch drivers need to implement a dsa_switch_driver structure which will
+DSA switch drivers need to implement a dsa_switch_ops structure which will
 contain the various members described below.
 
-register_switch_driver() registers this dsa_switch_driver in its internal list
+register_switch_driver() registers this dsa_switch_ops in its internal list
 of drivers to probe for. unregister_switch_driver() does the exact opposite.
 
 Unless requested differently by setting the priv_size member accordingly, DSA
@@ -379,7 +379,7 @@ Switch configuration
   buses, return a non-NULL string
 
 - setup: setup function for the switch, this function is responsible for setting
-  up the dsa_switch_driver private structure with all it needs: register maps,
+  up the dsa_switch_ops private structure with all it needs: register maps,
   interrupts, mutexes, locks etc.. This function is also expected to properly
   configure the switch to separate all network interfaces from each other, that
   is, they should be isolated by the switch hardware itself, typically by creating
@@ -584,28 +584,31 @@ of DSA, would be the its port-based VLAN, used by the associated bridge device.
   function that the driver has to call for each MAC address known to be behind
   the given port. A switchdev object is used to carry the VID and FDB info.
 
-TODO
-====
-
-The platform device problem
----------------------------
-DSA is currently implemented as a platform device driver which is far from ideal
-as was discussed in this thread:
+- port_mdb_prepare: bridge layer function invoked when the bridge prepares the
+  installation of a multicast database entry. If the operation is not supported,
+  this function should return -EOPNOTSUPP to inform the bridge code to fallback
+  to a software implementation. No hardware setup must be done in this function.
+  See port_fdb_add for this and details.
 
-http://permalink.gmane.org/gmane.linux.network/329848
+- port_mdb_add: bridge layer function invoked when the bridge wants to install
+  a multicast database entry, the switch hardware should be programmed with the
+  specified address in the specified VLAN ID in the forwarding database
+  associated with this VLAN ID.
 
-This basically prevents the device driver model to be properly used and applied,
-and support non-MDIO, non-MMIO Ethernet connected switches.
-
-Another problem with the platform device driver approach is that it prevents the
-use of a modular switch drivers build due to a circular dependency, illustrated
-here:
+Note: VLAN ID 0 corresponds to the port private database, which, in the context
+of DSA, would be the its port-based VLAN, used by the associated bridge device.
 
-http://comments.gmane.org/gmane.linux.network/345803
+- port_mdb_del: bridge layer function invoked when the bridge wants to remove a
+  multicast database entry, the switch hardware should be programmed to delete
+  the specified MAC address from the specified VLAN ID if it was mapped into
+  this port forwarding database.
 
-Attempts of reworking this has been done here:
+- port_mdb_dump: bridge layer function invoked with a switchdev callback
+  function that the driver has to call for each MAC address known to be behind
+  the given port. A switchdev object is used to carry the VID and MDB info.
 
-https://lwn.net/Articles/643149/
+TODO
+====
 
 Making SWITCHDEV and DSA converge towards an unified codebase
 -------------------------------------------------------------
index 9ae9293..3db8c67 100644 (file)
@@ -575,32 +575,33 @@ tcp_syncookies - BOOLEAN
        unconditionally generation of syncookies.
 
 tcp_fastopen - INTEGER
-       Enable TCP Fast Open feature (draft-ietf-tcpm-fastopen) to send data
-       in the opening SYN packet. To use this feature, the client application
-       must use sendmsg() or sendto() with MSG_FASTOPEN flag rather than
-       connect() to perform a TCP handshake automatically.
+       Enable TCP Fast Open (RFC7413) to send and accept data in the opening
+       SYN packet.
 
-       The values (bitmap) are
-       1: Enables sending data in the opening SYN on the client w/ MSG_FASTOPEN.
-       2: Enables TCP Fast Open on the server side, i.e., allowing data in
-          a SYN packet to be accepted and passed to the application before
-          3-way hand shake finishes.
-       4: Send data in the opening SYN regardless of cookie availability and
-          without a cookie option.
-       0x100: Accept SYN data w/o validating the cookie.
-       0x200: Accept data-in-SYN w/o any cookie option present.
-       0x400/0x800: Enable Fast Open on all listeners regardless of the
-          TCP_FASTOPEN socket option. The two different flags designate two
-          different ways of setting max_qlen without the TCP_FASTOPEN socket
-          option.
+       The client support is enabled by flag 0x1 (on by default). The client
+       then must use sendmsg() or sendto() with the MSG_FASTOPEN flag,
+       rather than connect() to send data in SYN.
 
-       Default: 1
+       The server support is enabled by flag 0x2 (off by default). Then
+       either enable for all listeners with another flag (0x400) or
+       enable individual listeners via TCP_FASTOPEN socket option with
+       the option value being the length of the syn-data backlog.
 
-       Note that the client & server side Fast Open flags (1 and 2
-       respectively) must be also enabled before the rest of flags can take
-       effect.
+       The values (bitmap) are
+         0x1: (client) enables sending data in the opening SYN on the client.
+         0x2: (server) enables the server support, i.e., allowing data in
+                       a SYN packet to be accepted and passed to the
+                       application before 3-way handshake finishes.
+         0x4: (client) send data in the opening SYN regardless of cookie
+                       availability and without a cookie option.
+       0x200: (server) accept data-in-SYN w/o any cookie option present.
+       0x400: (server) enable all listeners to support Fast Open by
+                       default without explicit TCP_FASTOPEN socket option.
+
+       Default: 0x1
 
-       See include/net/tcp.h and the code for more details.
+       Note that that additional client or server features are only
+       effective if the basic support (0x1 and 0x2) are enabled respectively.
 
 tcp_syn_retries - INTEGER
        Number of times initial SYNs for an active TCP connection attempt
index 70c926a..1b63bbc 100644 (file)
@@ -725,7 +725,8 @@ The kernel interface functions are as follows:
 
  (*) End a client call.
 
-       void rxrpc_kernel_end_call(struct rxrpc_call *call);
+       void rxrpc_kernel_end_call(struct socket *sock,
+                                  struct rxrpc_call *call);
 
      This is used to end a previously begun call.  The user_call_ID is expunged
      from AF_RXRPC's knowledge and will not be seen again in association with
@@ -733,7 +734,9 @@ The kernel interface functions are as follows:
 
  (*) Send data through a call.
 
-       int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
+       int rxrpc_kernel_send_data(struct socket *sock,
+                                  struct rxrpc_call *call,
+                                  struct msghdr *msg,
                                   size_t len);
 
      This is used to supply either the request part of a client call or the
@@ -745,9 +748,42 @@ The kernel interface functions are as follows:
      The msg must not specify a destination address, control data or any flags
      other than MSG_MORE.  len is the total amount of data to transmit.
 
+ (*) Receive data from a call.
+
+       int rxrpc_kernel_recv_data(struct socket *sock,
+                                  struct rxrpc_call *call,
+                                  void *buf,
+                                  size_t size,
+                                  size_t *_offset,
+                                  bool want_more,
+                                  u32 *_abort)
+
+      This is used to receive data from either the reply part of a client call
+      or the request part of a service call.  buf and size specify how much
+      data is desired and where to store it.  *_offset is added on to buf and
+      subtracted from size internally; the amount copied into the buffer is
+      added to *_offset before returning.
+
+      want_more should be true if further data will be required after this is
+      satisfied and false if this is the last item of the receive phase.
+
+      There are three normal returns: 0 if the buffer was filled and want_more
+      was true; 1 if the buffer was filled, the last DATA packet has been
+      emptied and want_more was false; and -EAGAIN if the function needs to be
+      called again.
+
+      If the last DATA packet is processed but the buffer contains less than
+      the amount requested, EBADMSG is returned.  If want_more wasn't set, but
+      more data was available, EMSGSIZE is returned.
+
+      If a remote ABORT is detected, the abort code received will be stored in
+      *_abort and ECONNABORTED will be returned.
+
  (*) Abort a call.
 
-       void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code);
+       void rxrpc_kernel_abort_call(struct socket *sock,
+                                    struct rxrpc_call *call,
+                                    u32 abort_code);
 
      This is used to abort a call if it's still in an abortable state.  The
      abort code specified will be placed in the ABORT message sent.
@@ -820,47 +856,6 @@ The kernel interface functions are as follows:
      Other errors may be returned if the call had been aborted (-ECONNABORTED)
      or had timed out (-ETIME).
 
- (*) Record the delivery of a data message.
-
-       void rxrpc_kernel_data_consumed(struct rxrpc_call *call,
-                                       struct sk_buff *skb);
-
-     This is used to record a data message as having been consumed and to
-     update the ACK state for the call.  The message must still be passed to
-     rxrpc_kernel_free_skb() for disposal by the caller.
-
- (*) Free a message.
-
-       void rxrpc_kernel_free_skb(struct sk_buff *skb);
-
-     This is used to free a non-DATA socket buffer intercepted from an AF_RXRPC
-     socket.
-
- (*) Determine if a data message is the last one on a call.
-
-       bool rxrpc_kernel_is_data_last(struct sk_buff *skb);
-
-     This is used to determine if a socket buffer holds the last data message
-     to be received for a call (true will be returned if it does, false
-     if not).
-
-     The data message will be part of the reply on a client call and the
-     request on an incoming call.  In the latter case there will be more
-     messages, but in the former case there will not.
-
- (*) Get the abort code from an abort message.
-
-       u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb);
-
-     This is used to extract the abort code from a remote abort message.
-
- (*) Get the error number from a local or network error message.
-
-       int rxrpc_kernel_get_error_number(struct sk_buff *skb);
-
-     This is used to extract the error number from a message indicating either
-     a local error occurred or a network error occurred.
-
  (*) Allocate a null key for doing anonymous security.
 
        struct key *rxrpc_get_null_key(const char *keyname);
@@ -868,6 +863,13 @@ The kernel interface functions are as follows:
      This is used to allocate a null RxRPC key that can be used to indicate
      anonymous security for a particular domain.
 
+ (*) Get the peer address of a call.
+
+       void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
+                                  struct sockaddr_rxrpc *_srx);
+
+     This is used to find the remote peer address of a call.
+
 
 =======================
 CONFIGURABLE PARAMETERS
index 31c3911..44235e8 100644 (file)
@@ -283,15 +283,10 @@ be sent to the port netdev for processing by the bridge driver.  The
 bridge should not reflood the packet to the same ports the device flooded,
 otherwise there will be duplicate packets on the wire.
 
-To avoid duplicate packets, the device/driver should mark a packet as already
-forwarded using skb->offload_fwd_mark.  The same mark is set on the device
-ports in the domain using dev->offload_fwd_mark.  If the skb->offload_fwd_mark
-is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel
-will drop the skb right before transmit on the egress port, with the
-understanding that the device already forwarded the packet on same egress port.
-The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark
-for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and
-a group ifindex.
+To avoid duplicate packets, the switch driver should mark a packet as already
+forwarded by setting the skb->offload_fwd_mark bit. The bridge driver will mark
+the skb using the ingress bridge port's mark and prevent it from being forwarded
+through any bridge port with the same mark.
 
 It is possible for the switch device to not handle flooding and push the
 packets up to the bridge driver for flooding.  This is not ideal as the number
index b96098c..708f87f 100644 (file)
@@ -164,7 +164,32 @@ load n/2 modules more and try again.
 Again, if you find the offending module(s), it(they) must be unloaded every time
 before hibernation, and please report the problem with it(them).
 
-c) Advanced debugging
+c) Using the "test_resume" hibernation option
+
+/sys/power/disk generally tells the kernel what to do after creating a
+hibernation image.  One of the available options is "test_resume" which
+causes the just created image to be used for immediate restoration.  Namely,
+after doing:
+
+# echo test_resume > /sys/power/disk
+# echo disk > /sys/power/state
+
+a hibernation image will be created and a resume from it will be triggered
+immediately without involving the platform firmware in any way.
+
+That test can be used to check if failures to resume from hibernation are
+related to bad interactions with the platform firmware.  That is, if the above
+works every time, but resume from actual hibernation does not work or is
+unreliable, the platform firmware may be responsible for the failures.
+
+On architectures and platforms that support using different kernels to restore
+hibernation images (that is, the kernel used to read the image from storage and
+load it into memory is different from the one included in the image) or support
+kernel address space randomization, it also can be used to check if failures
+to resume may be related to the differences between the restore and image
+kernels.
+
+d) Advanced debugging
 
 In case that hibernation does not work on your system even in the minimal
 configuration and compiling more drivers as modules is not practical or some
index f1f0f59..974916f 100644 (file)
@@ -1,75 +1,76 @@
-Power Management Interface
-
-
-The power management subsystem provides a unified sysfs interface to 
-userspace, regardless of what architecture or platform one is
-running. The interface exists in /sys/power/ directory (assuming sysfs
-is mounted at /sys). 
-
-/sys/power/state controls system power state. Reading from this file
-returns what states are supported, which is hard-coded to 'freeze',
-'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and 'disk'
-(Suspend-to-Disk). 
-
-Writing to this file one of those strings causes the system to
-transition into that state. Please see the file
-Documentation/power/states.txt for a description of each of those
-states.
-
-
-/sys/power/disk controls the operating mode of the suspend-to-disk
-mechanism. Suspend-to-disk can be handled in several ways. We have a
-few options for putting the system to sleep - using the platform driver
-(e.g. ACPI or other suspend_ops), powering off the system or rebooting the
-system (for testing).
-
-Additionally, /sys/power/disk can be used to turn on one of the two testing
-modes of the suspend-to-disk mechanism: 'testproc' or 'test'.  If the
-suspend-to-disk mechanism is in the 'testproc' mode, writing 'disk' to
-/sys/power/state will cause the kernel to disable nonboot CPUs and freeze
-tasks, wait for 5 seconds, unfreeze tasks and enable nonboot CPUs.  If it is
-in the 'test' mode, writing 'disk' to /sys/power/state will cause the kernel
-to disable nonboot CPUs and freeze tasks, shrink memory, suspend devices, wait
-for 5 seconds, resume devices, unfreeze tasks and enable nonboot CPUs.  Then,
-we are able to look in the log messages and work out, for example, which code
-is being slow and which device drivers are misbehaving.
-
-Reading from this file will display all supported modes and the currently
-selected one in brackets, for example
-
-       [shutdown] reboot test testproc
-
-Writing to this file will accept one of
-
-       'platform' (only if the platform supports it)
-       'shutdown'
-       'reboot'
-       'testproc'
-       'test'
-
-/sys/power/image_size controls the size of the image created by
-the suspend-to-disk mechanism.  It can be written a string
-representing a non-negative integer that will be used as an upper
-limit of the image size, in bytes.  The suspend-to-disk mechanism will
-do its best to ensure the image size will not exceed that number.  However,
-if this turns out to be impossible, it will try to suspend anyway using the
-smallest image possible.  In particular, if "0" is written to this file, the
-suspend image will be as small as possible.
-
-Reading from this file will display the current image size limit, which
-is set to 2/5 of available RAM by default.
-
-/sys/power/pm_trace controls the code which saves the last PM event point in
-the RTC across reboots, so that you can debug a machine that just hangs
-during suspend (or more commonly, during resume).  Namely, the RTC is only
-used to save the last PM event point if this file contains '1'.  Initially it
-contains '0' which may be changed to '1' by writing a string representing a
-nonzero integer into it.
-
-To use this debugging feature you should attempt to suspend the machine, then
-reboot it and run
-
-       dmesg -s 1000000 | grep 'hash matches'
-
-CAUTION: Using it will cause your machine's real-time (CMOS) clock to be
-set to a random invalid time after a resume.
+Power Management Interface for System Sleep
+
+Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+The power management subsystem provides userspace with a unified sysfs interface
+for system sleep regardless of the underlying system architecture or platform.
+The interface is located in the /sys/power/ directory (assuming that sysfs is
+mounted at /sys).
+
+/sys/power/state is the system sleep state control file.
+
+Reading from it returns a list of supported sleep states, encoded as:
+
+'freeze' (Suspend-to-Idle)
+'standby' (Power-On Suspend)
+'mem' (Suspend-to-RAM)
+'disk' (Suspend-to-Disk)
+
+Suspend-to-Idle is always supported.  Suspend-to-Disk is always supported
+too as long the kernel has been configured to support hibernation at all
+(ie. CONFIG_HIBERNATION is set in the kernel configuration file).  Support
+for Suspend-to-RAM and Power-On Suspend depends on the capabilities of the
+platform.
+
+If one of the strings listed in /sys/power/state is written to it, the system
+will attempt to transition into the corresponding sleep state.  Refer to
+Documentation/power/states.txt for a description of each of those states.
+
+/sys/power/disk controls the operating mode of hibernation (Suspend-to-Disk).
+Specifically, it tells the kernel what to do after creating a hibernation image.
+
+Reading from it returns a list of supported options encoded as:
+
+'platform' (put the system into sleep using a platform-provided method)
+'shutdown' (shut the system down)
+'reboot' (reboot the system)
+'suspend' (trigger a Suspend-to-RAM transition)
+'test_resume' (resume-after-hibernation test mode)
+
+The currently selected option is printed in square brackets.
+
+The 'platform' option is only available if the platform provides a special
+mechanism to put the system to sleep after creating a hibernation image (ACPI
+does that, for example).  The 'suspend' option is available if Suspend-to-RAM
+is supported.  Refer to Documentation/power/basic_pm_debugging.txt for the
+description of the 'test_resume' option.
+
+To select an option, write the string representing it to /sys/power/disk.
+
+/sys/power/image_size controls the size of hibernation images.
+
+It can be written a string representing a non-negative integer that will be
+used as a best-effort upper limit of the image size, in bytes.  The hibernation
+core will do its best to ensure that the image size will not exceed that number.
+However, if that turns out to be impossible to achieve, a hibernation image will
+still be created and its size will be as small as possible.  In particular,
+writing '0' to this file will enforce hibernation images to be as small as
+possible.
+
+Reading from this file returns the current image size limit, which is set to
+around 2/5 of available RAM by default.
+
+/sys/power/pm_trace controls the PM trace mechanism saving the last suspend
+or resume event point in the RTC across reboots.
+
+It helps to debug hard lockups or reboots due to device driver failures that
+occur during system suspend or resume (which is more common) more effectively.
+
+If /sys/power/pm_trace contains '1', the fingerprint of each suspend/resume
+event point in turn will be stored in the RTC memory (overwriting the actual
+RTC information), so it will survive a system crash if one occurs right after
+storing it and it can be used later to identify the driver that caused the crash
+to happen (see Documentation/power/s2ram.txt for more information).
+
+Initially it contains '0' which may be changed to '1' by writing a string
+representing a nonzero integer into it.
index ba0a2a4..e32fdbb 100644 (file)
@@ -167,6 +167,8 @@ signal will be rolled back anyway.
 For signals taken in non-TM or suspended mode, we use the
 normal/non-checkpointed stack pointer.
 
+Any transaction initiated inside a sighandler and suspended on return
+from the sighandler to the kernel will get reclaimed and discarded.
 
 Failure cause codes used by kernel
 ==================================
index 6e491a6..a53f786 100644 (file)
@@ -80,6 +80,10 @@ functionality of their platform when planning to use this driver:
 
 III. Module parameters
 
+- 'dma_timeout' - DMA transfer completion timeout (in msec, default value 3000).
+        This parameter set a maximum completion wait time for SYNC mode DMA
+        transfer requests and for RIO_WAIT_FOR_ASYNC ioctl requests.
+
 - 'dbg_level' - This parameter allows to control amount of debug information
         generated by this device driver. This parameter is formed by set of
         bit masks that correspond to the specific functional blocks.
index 3a2ac4b..e88461c 100644 (file)
     caption a.headerlink { opacity: 0; }
     caption a.headerlink:hover { opacity: 1; }
 
-    /* inline literal: drop the borderbox and red color */
+    /* inline literal: drop the borderbox, padding and red color */
 
     code, .rst-content tt, .rst-content code {
         color: inherit;
         border: none;
+        padding: unset;
         background: inherit;
         font-size: 85%;
     }
index e902b63..ce80b36 100644 (file)
@@ -807,6 +807,7 @@ M:  Laura Abbott <labbott@redhat.com>
 M:     Sumit Semwal <sumit.semwal@linaro.org>
 L:     devel@driverdev.osuosl.org
 S:     Supported
+F:     Documentation/devicetree/bindings/staging/ion/
 F:     drivers/staging/android/ion
 F:     drivers/staging/android/uapi/ion.h
 F:     drivers/staging/android/uapi/ion_test.h
@@ -890,6 +891,15 @@ S: Supported
 F:     drivers/gpu/drm/arc/
 F:     Documentation/devicetree/bindings/display/snps,arcpgu.txt
 
+ARM ARCHITECTED TIMER DRIVER
+M:     Mark Rutland <mark.rutland@arm.com>
+M:     Marc Zyngier <marc.zyngier@arm.com>
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:     Maintained
+F:     arch/arm/include/asm/arch_timer.h
+F:     arch/arm64/include/asm/arch_timer.h
+F:     drivers/clocksource/arm_arch_timer.c
+
 ARM HDLCD DRM DRIVER
 M:     Liviu Dudau <liviu.dudau@arm.com>
 S:     Supported
@@ -1623,7 +1633,7 @@ N:        rockchip
 
 ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
 M:     Kukjin Kim <kgene@kernel.org>
-M:     Krzysztof Kozlowski <k.kozlowski@samsung.com>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
 S:     Maintained
@@ -1643,7 +1653,6 @@ F:        drivers/*/*s3c64xx*
 F:     drivers/*/*s5pv210*
 F:     drivers/memory/samsung/*
 F:     drivers/soc/samsung/*
-F:     drivers/spi/spi-s3c*
 F:     Documentation/arm/Samsung/
 F:     Documentation/devicetree/bindings/arm/samsung/
 F:     Documentation/devicetree/bindings/sram/samsung-sram.txt
@@ -1831,6 +1840,7 @@ T:        git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
 ARM/UNIPHIER ARCHITECTURE
 M:     Masahiro Yamada <yamada.masahiro@socionext.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-uniphier.git
 S:     Maintained
 F:     arch/arm/boot/dts/uniphier*
 F:     arch/arm/include/asm/hardware/cache-uniphier.h
@@ -2484,7 +2494,7 @@ F:        include/net/bluetooth/
 BONDING DRIVER
 M:     Jay Vosburgh <j.vosburgh@gmail.com>
 M:     Veaceslav Falico <vfalico@gmail.com>
-M:     Andy Gospodarek <gospo@cumulusnetworks.com>
+M:     Andy Gospodarek <andy@greyhouse.net>
 L:     netdev@vger.kernel.org
 W:     http://sourceforge.net/projects/bonding/
 S:     Supported
@@ -3247,7 +3257,7 @@ F:        kernel/cpuset.c
 CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
 M:     Johannes Weiner <hannes@cmpxchg.org>
 M:     Michal Hocko <mhocko@kernel.org>
-M:     Vladimir Davydov <vdavydov@virtuozzo.com>
+M:     Vladimir Davydov <vdavydov.dev@gmail.com>
 L:     cgroups@vger.kernel.org
 L:     linux-mm@kvack.org
 S:     Maintained
@@ -3268,7 +3278,7 @@ S:        Maintained
 F:     drivers/net/wan/cosa*
 
 CPMAC ETHERNET DRIVER
-M:     Florian Fainelli <florian@openwrt.org>
+M:     Florian Fainelli <f.fainelli@gmail.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     drivers/net/ethernet/ti/cpmac.c
@@ -4534,6 +4544,12 @@ L:       linux-edac@vger.kernel.org
 S:     Maintained
 F:     drivers/edac/sb_edac.c
 
+EDAC-SKYLAKE
+M:     Tony Luck <tony.luck@intel.com>
+L:     linux-edac@vger.kernel.org
+S:     Maintained
+F:     drivers/edac/skx_edac.c
+
 EDAC-XGENE
 APPLIED MICRO (APM) X-GENE SOC EDAC
 M:     Loc Ho <lho@apm.com>
@@ -7458,7 +7474,8 @@ F:        Documentation/devicetree/bindings/sound/max9860.txt
 F:     sound/soc/codecs/max9860.*
 
 MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
-M:     Krzysztof Kozlowski <k.kozlowski@samsung.com>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 L:     linux-pm@vger.kernel.org
 S:     Supported
 F:     drivers/power/max14577_charger.c
@@ -7474,7 +7491,8 @@ F:        include/dt-bindings/*/*max77802.h
 
 MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
 M:     Chanwoo Choi <cw00.choi@samsung.com>
-M:     Krzysztof Kozlowski <k.kozlowski@samsung.com>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 L:     linux-kernel@vger.kernel.org
 S:     Supported
 F:     drivers/*/max14577*.c
@@ -7664,7 +7682,7 @@ L:        linux-rdma@vger.kernel.org
 S:     Supported
 W:     https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
 Q:     http://patchwork.kernel.org/project/linux-rdma/list/
-F:     drivers/infiniband/hw/rxe/
+F:     drivers/infiniband/sw/rxe/
 F:     include/uapi/rdma/rdma_user_rxe.h
 
 MEMBARRIER SUPPORT
@@ -9240,7 +9258,7 @@ F:        drivers/pinctrl/sh-pfc/
 
 PIN CONTROLLER - SAMSUNG
 M:     Tomasz Figa <tomasz.figa@gmail.com>
-M:     Krzysztof Kozlowski <k.kozlowski@samsung.com>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 M:     Sylwester Nawrocki <s.nawrocki@samsung.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
@@ -9681,6 +9699,12 @@ T:       git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
 S:     Supported
 F:     drivers/net/wireless/ath/ath10k/
 
+QUALCOMM EMAC GIGABIT ETHERNET DRIVER
+M:     Timur Tabi <timur@codeaurora.org>
+L:     netdev@vger.kernel.org
+S:     Supported
+F:     drivers/net/ethernet/qualcomm/emac/
+
 QUALCOMM HEXAGON ARCHITECTURE
 M:     Richard Kuo <rkuo@codeaurora.org>
 L:     linux-hexagon@vger.kernel.org
@@ -9936,6 +9960,7 @@ F:        net/rfkill/
 
 RHASHTABLE
 M:     Thomas Graf <tgraf@suug.ch>
+M:     Herbert Xu <herbert@gondor.apana.org.au>
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     lib/rhashtable.c
@@ -10173,7 +10198,7 @@ S:      Maintained
 F:     drivers/platform/x86/samsung-laptop.c
 
 SAMSUNG AUDIO (ASoC) DRIVERS
-M:     Krzysztof Kozlowski <k.kozlowski@samsung.com>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 M:     Sangbeom Kim <sbkim73@samsung.com>
 M:     Sylwester Nawrocki <s.nawrocki@samsung.com>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
@@ -10188,7 +10213,8 @@ F:      drivers/video/fbdev/s3c-fb.c
 
 SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
 M:     Sangbeom Kim <sbkim73@samsung.com>
-M:     Krzysztof Kozlowski <k.kozlowski@samsung.com>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 L:     linux-kernel@vger.kernel.org
 L:     linux-samsung-soc@vger.kernel.org
 S:     Supported
@@ -10247,6 +10273,17 @@ S:     Supported
 L:     linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
 F:     drivers/clk/samsung/
 
+SAMSUNG SPI DRIVERS
+M:     Kukjin Kim <kgene@kernel.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
+M:     Andi Shyti <andi.shyti@samsung.com>
+L:     linux-spi@vger.kernel.org
+L:     linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
+S:     Maintained
+F:     Documentation/devicetree/bindings/spi/spi-samsung.txt
+F:     drivers/spi/spi-s3c*
+F:     include/linux/platform_data/spi-s3c64xx.h
+
 SAMSUNG SXGBE DRIVERS
 M:     Byungho An <bh74.an@samsung.com>
 M:     Girish K S <ks.giri@samsung.com>
@@ -11226,12 +11263,8 @@ S:     Odd Fixes
 F:     drivers/staging/vt665?/
 
 STAGING - WILC1000 WIFI DRIVER
-M:     Johnny Kim <johnny.kim@atmel.com>
-M:     Austin Shin <austin.shin@atmel.com>
-M:     Chris Park <chris.park@atmel.com>
-M:     Tony Cho <tony.cho@atmel.com>
-M:     Glen Lee <glen.lee@atmel.com>
-M:     Leo Kim <leo.kim@atmel.com>
+M:     Aditya Shankar <aditya.shankar@microchip.com>
+M:     Ganesh Krishna <ganesh.krishna@microchip.com>
 L:     linux-wireless@vger.kernel.org
 S:     Supported
 F:     drivers/staging/wilc1000/
@@ -12263,6 +12296,7 @@ F:      drivers/net/usb/smsc75xx.*
 
 USB SMSC95XX ETHERNET DRIVER
 M:     Steve Glendinning <steve.glendinning@shawell.net>
+M:     Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     drivers/net/usb/smsc95xx.*
index 5c18baa..1a8c8dd 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 8
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc6
 NAME = Psychotic Stoned Sheep
 
 # *DOCUMENTATION*
index e9c9334..fd6e971 100644 (file)
@@ -336,17 +336,6 @@ config HAVE_ARCH_SECCOMP_FILTER
            results in the system call being skipped immediately.
          - seccomp syscall wired up
 
-         For best performance, an arch should use seccomp_phase1 and
-         seccomp_phase2 directly.  It should call seccomp_phase1 for all
-         syscalls if TIF_SECCOMP is set, but seccomp_phase1 does not
-         need to be called from a ptrace-safe context.  It must then
-         call seccomp_phase2 if seccomp_phase1 returns anything other
-         than SECCOMP_PHASE1_OK or SECCOMP_PHASE1_SKIP.
-
-         As an additional optimization, an arch may provide seccomp_data
-         directly to seccomp_phase1; this avoids multiple calls
-         to the syscall_xyz helpers for every syscall.
-
 config SECCOMP_FILTER
        def_bool y
        depends on HAVE_ARCH_SECCOMP_FILTER && SECCOMP && NET
index ad7860c..51597f3 100644 (file)
 
 #ifdef CONFIG_ARC_CURR_IN_REG
        ; Retrieve orig r25 and save it with rest of callee_regs
-       ld.as   r12, [r12, PT_user_r25]
+       ld      r12, [r12, PT_user_r25]
        PUSH    r12
 #else
        PUSH    r25
 
        ; SP is back to start of pt_regs
 #ifdef CONFIG_ARC_CURR_IN_REG
-       st.as   r12, [sp, PT_user_r25]
+       st      r12, [sp, PT_user_r25]
 #endif
 .endm
 
index c1d3645..4c6eed8 100644 (file)
@@ -188,10 +188,10 @@ static inline int arch_irqs_disabled(void)
 .endm
 
 .macro IRQ_ENABLE  scratch
+       TRACE_ASM_IRQ_ENABLE
        lr      \scratch, [status32]
        or      \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
        flag    \scratch
-       TRACE_ASM_IRQ_ENABLE
 .endm
 
 #endif /* __ASSEMBLY__ */
index 0f92d97..89eeb37 100644 (file)
@@ -280,7 +280,7 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
 
 #define pte_page(pte)          pfn_to_page(pte_pfn(pte))
 #define mk_pte(page, prot)     pfn_pte(page_to_pfn(page), prot)
-#define pfn_pte(pfn, prot)     (__pte(((pte_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define pfn_pte(pfn, prot)     __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
 
 /* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/
 #define pte_pfn(pte)           (pte_val(pte) >> PAGE_SHIFT)
index 0f99ac8..0037a58 100644 (file)
 
 /* Machine specific ELF Hdr flags */
 #define EF_ARC_OSABI_MSK       0x00000f00
-#define EF_ARC_OSABI_ORIG      0x00000000   /* MUST be zero for back-compat */
-#define EF_ARC_OSABI_CURRENT   0x00000300   /* v3 (no legacy syscalls) */
+
+#define EF_ARC_OSABI_V3                0x00000300   /* v3 (no legacy syscalls) */
+#define EF_ARC_OSABI_V4                0x00000400   /* v4 (64bit data any reg align) */
+
+#if __GNUC__ < 6
+#define EF_ARC_OSABI_CURRENT   EF_ARC_OSABI_V3
+#else
+#define EF_ARC_OSABI_CURRENT   EF_ARC_OSABI_V4
+#endif
 
 typedef unsigned long elf_greg_t;
 typedef unsigned long elf_fpregset_t;
index 4d9e777..000dd04 100644 (file)
@@ -28,6 +28,7 @@ extern void __muldf3(void);
 extern void __divdf3(void);
 extern void __floatunsidf(void);
 extern void __floatunsisf(void);
+extern void __udivdi3(void);
 
 EXPORT_SYMBOL(__ashldi3);
 EXPORT_SYMBOL(__ashrdi3);
@@ -45,6 +46,7 @@ EXPORT_SYMBOL(__muldf3);
 EXPORT_SYMBOL(__divdf3);
 EXPORT_SYMBOL(__floatunsidf);
 EXPORT_SYMBOL(__floatunsisf);
+EXPORT_SYMBOL(__udivdi3);
 
 /* ARC optimised assembler routines */
 EXPORT_SYMBOL(memset);
index b5db9e7..be1972b 100644 (file)
@@ -199,7 +199,7 @@ int elf_check_arch(const struct elf32_hdr *x)
        }
 
        eflags = x->e_flags;
-       if ((eflags & EF_ARC_OSABI_MSK) < EF_ARC_OSABI_CURRENT) {
+       if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) {
                pr_err("ABI mismatch - you need newer toolchain\n");
                force_sigsegv(SIGSEGV, current);
                return 0;
index a946400..f52a0d0 100644 (file)
@@ -291,8 +291,10 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
                               cpu->dccm.base_addr, TO_KB(cpu->dccm.sz),
                               cpu->iccm.base_addr, TO_KB(cpu->iccm.sz));
 
-       n += scnprintf(buf + n, len - n,
-                      "OS ABI [v3]\t: no-legacy-syscalls\n");
+       n += scnprintf(buf + n, len - n, "OS ABI [v%d]\t: %s\n",
+                       EF_ARC_OSABI_CURRENT >> 8,
+                       EF_ARC_OSABI_CURRENT == EF_ARC_OSABI_V3 ?
+                       "no-legacy-syscalls" : "64-bit data any register aligned");
 
        return buf;
 }
index 5a294b2..0b10efe 100644 (file)
@@ -921,6 +921,15 @@ void arc_cache_init(void)
 
        printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
 
+       /*
+        * Only master CPU needs to execute rest of function:
+        *  - Assume SMP so all cores will have same cache config so
+        *    any geomtry checks will be same for all
+        *  - IOC setup / dma callbacks only need to be setup once
+        */
+       if (cpu)
+               return;
+
        if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
                struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
 
index 04f8332..77ff64a 100644 (file)
@@ -61,6 +61,7 @@ void *kmap(struct page *page)
 
        return kmap_high(page);
 }
+EXPORT_SYMBOL(kmap);
 
 void *kmap_atomic(struct page *page)
 {
index c8609d8..b689172 100644 (file)
 
                #address-cells = <1>;
                #size-cells = <1>;
-               elm_id = <&elm>;
+               ti,elm-id = <&elm>;
        };
 };
 
index df63484..e7d9ca1 100644 (file)
 
                #address-cells = <1>;
                #size-cells = <1>;
-               elm_id = <&elm>;
+               ti,elm-id = <&elm>;
 
                /* MTD partition table */
                partition@0 {
index 86f7731..1263c9d 100644 (file)
                gpmc,wr-access-ns = <30>;
                gpmc,wr-data-mux-bus-ns = <0>;
 
-               elm_id = <&elm>;
+               ti,elm-id = <&elm>;
 
                #address-cells = <1>;
                #size-cells = <1>;
index 2e0556a..d3e6bd8 100644 (file)
 
                        port@0 {
                                reg = <0>;
-                               label = "lan1";
+                               label = "lan5";
                        };
 
                        port@1 {
                                reg = <1>;
-                               label = "lan2";
+                               label = "lan4";
                        };
 
                        port@2 {
 
                        port@3 {
                                reg = <3>;
-                               label = "lan4";
+                               label = "lan2";
                        };
 
                        port@4 {
                                reg = <4>;
-                               label = "lan5";
+                               label = "lan1";
                        };
 
                        port@5 {
index d949931..f6d1352 100644 (file)
        samsung,dw-mshc-ciu-div = <3>;
        samsung,dw-mshc-sdr-timing = <0 4>;
        samsung,dw-mshc-ddr-timing = <0 2>;
-       samsung,dw-mshc-hs400-timing = <0 2>;
-       samsung,read-strobe-delay = <90>;
        pinctrl-names = "default";
        pinctrl-0 = <&sd0_clk &sd0_cmd &sd0_bus1 &sd0_bus4 &sd0_bus8 &sd0_cd>;
        bus-width = <8>;
        cap-mmc-highspeed;
        mmc-hs200-1_8v;
-       mmc-hs400-1_8v;
        vmmc-supply = <&ldo20_reg>;
        vqmmc-supply = <&ldo11_reg>;
 };
index b620ac8..b13b0b2 100644 (file)
                                        clocks = <&clks IMX6QDL_CLK_SPDIF_GCLK>, <&clks IMX6QDL_CLK_OSC>,
                                                 <&clks IMX6QDL_CLK_SPDIF>, <&clks IMX6QDL_CLK_ASRC>,
                                                 <&clks IMX6QDL_CLK_DUMMY>, <&clks IMX6QDL_CLK_ESAI_EXTAL>,
-                                                <&clks IMX6QDL_CLK_IPG>, <&clks IMX6QDL_CLK_MLB>,
+                                                <&clks IMX6QDL_CLK_IPG>, <&clks IMX6QDL_CLK_DUMMY>,
                                                 <&clks IMX6QDL_CLK_DUMMY>, <&clks IMX6QDL_CLK_SPBA>;
                                        clock-names = "core",  "rxtx0",
                                                      "rxtx1", "rxtx2",
index 96ea936..240a286 100644 (file)
@@ -64,7 +64,7 @@
        cd-gpios = <&gpio7 11 GPIO_ACTIVE_LOW>;
        no-1-8-v;
        keep-power-in-suspend;
-       enable-sdio-wakup;
+       wakeup-source;
        status = "okay";
 };
 
index 95ee268..2f33c46 100644 (file)
                ti,y-min = /bits/ 16 <0>;
                ti,y-max = /bits/ 16 <0>;
                ti,pressure-max = /bits/ 16 <0>;
-               ti,x-plat-ohms = /bits/ 16 <400>;
+               ti,x-plate-ohms = /bits/ 16 <400>;
                wakeup-source;
        };
 };
index ef84d86..5bf6289 100644 (file)
 
        partition@e0000 {
                label = "u-boot environment";
-               reg = <0xe0000 0x100000>;
+               reg = <0xe0000 0x20000>;
        };
 
        partition@100000 {
index e4ecab1..7175511 100644 (file)
        };
 };
 
+&pciec {
+       status = "okay";
+};
+
 &pcie0 {
        status = "okay";
 };
index 365f39f..0ff1c2d 100644 (file)
        ranges = <0 0 0x00000000 0x1000000>;    /* CS0: 16MB for NAND */
 
        nand@0,0 {
-               linux,mtd-name = "micron,mt29f4g16abbda3w";
+               compatible = "ti,omap2-nand";
                reg = <0 0 4>; /* CS0, offset 0, IO size 4 */
+               interrupt-parent = <&gpmc>;
+               interrupts = <0 IRQ_TYPE_NONE>, /* fifoevent */
+                            <1 IRQ_TYPE_NONE>; /* termcount */
+               linux,mtd-name = "micron,mt29f4g16abbda3w";
                nand-bus-width = <16>;
                ti,nand-ecc-opt = "bch8";
+               rb-gpios = <&gpmc 0 GPIO_ACTIVE_HIGH>; /* gpmc_wait0 */
                gpmc,sync-clk-ps = <0>;
                gpmc,cs-on-ns = <0>;
                gpmc,cs-rd-off-ns = <44>;
                gpmc,wr-access-ns = <40>;
                gpmc,wr-data-mux-bus-ns = <0>;
                gpmc,device-width = <2>;
-
-               gpmc,page-burst-access-ns = <5>;
-               gpmc,cycle2cycle-delay-ns = <50>;
-
                #address-cells = <1>;
                #size-cells = <1>;
 
index 5e9a13c..1c2c746 100644 (file)
@@ -46,6 +46,7 @@
                linux,mtd-name = "micron,mt29f4g16abbda3w";
                nand-bus-width = <16>;
                ti,nand-ecc-opt = "bch8";
+               rb-gpios = <&gpmc 0 GPIO_ACTIVE_HIGH>; /* gpmc_wait0 */
                gpmc,sync-clk-ps = <0>;
                gpmc,cs-on-ns = <0>;
                gpmc,cs-rd-off-ns = <44>;
index de256fa..3e946ca 100644 (file)
 };
 
 &gpmc {
-       ranges = <0 0 0x00000000 0x20000000>;
+       ranges = <0 0 0x30000000 0x1000000>,    /* CS0 */
+                <4 0 0x2b000000 0x1000000>,    /* CS4 */
+                <5 0 0x2c000000 0x1000000>;    /* CS5 */
 
        nand@0,0 {
                compatible = "ti,omap2-nand";
index 7df2792..4f4c6ef 100644 (file)
@@ -55,8 +55,6 @@
 #include "omap-gpmc-smsc9221.dtsi"
 
 &gpmc {
-       ranges = <5 0 0x2c000000 0x1000000>;    /* CS5 */
-
        ethernet@gpmc {
                reg = <5 0 0xff>;
                interrupt-parent = <&gpio6>;
index 9e24b6a..1b304e2 100644 (file)
@@ -27,8 +27,6 @@
 #include "omap-gpmc-smsc9221.dtsi"
 
 &gpmc {
-       ranges = <5 0 0x2c000000 0x1000000>;    /* CS5 */
-
        ethernet@gpmc {
                reg = <5 0 0xff>;
                interrupt-parent = <&gpio6>;
index 334109e..82e98ee 100644 (file)
@@ -15,9 +15,6 @@
 #include "omap-gpmc-smsc9221.dtsi"
 
 &gpmc {
-       ranges = <4 0 0x2b000000 0x1000000>,    /* CS4 */
-                <5 0 0x2c000000 0x1000000>;    /* CS5 */
-
        smsc1: ethernet@gpmc {
                reg = <5 0 0xff>;
                interrupt-parent = <&gpio6>;
index c0ba86c..0d0dae3 100644 (file)
                clock-names = "saradc", "apb_pclk";
                interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
                #io-channel-cells = <1>;
+               resets = <&cru SRST_SARADC>;
+               reset-names = "saradc-apb";
                status = "disabled";
        };
 
index cd33f01..91c4b3c 100644 (file)
                #io-channel-cells = <1>;
                clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>;
                clock-names = "saradc", "apb_pclk";
+               resets = <&cru SRST_SARADC>;
+               reset-names = "saradc-apb";
                status = "disabled";
        };
 
index 99bbcc2..e2cd683 100644 (file)
                #io-channel-cells = <1>;
                clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>;
                clock-names = "saradc", "apb_pclk";
+               resets = <&cru SRST_SARADC>;
+               reset-names = "saradc-apb";
                status = "disabled";
        };
 
index e012890..a17ba02 100644 (file)
@@ -84,7 +84,7 @@
                        trips {
                                cpu_alert0: cpu_alert0 {
                                        /* milliCelsius */
-                                       temperature = <850000>;
+                                       temperature = <85000>;
                                        hysteresis = <2000>;
                                        type = "passive";
                                };
index 1dfc492..1444fbd 100644 (file)
                palmas: tps65913@58 {
                        compatible = "ti,palmas";
                        reg = <0x58>;
-                       interrupts = <0 86 IRQ_TYPE_LEVEL_LOW>;
+                       interrupts = <0 86 IRQ_TYPE_LEVEL_HIGH>;
 
                        #interrupt-cells = <2>;
                        interrupt-controller;
index 70cf409..966a7fc 100644 (file)
                palmas: pmic@58 {
                        compatible = "ti,palmas";
                        reg = <0x58>;
-                       interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_LOW>;
+                       interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
 
                        #interrupt-cells = <2>;
                        interrupt-controller;
index 17dd145..a161fa1 100644 (file)
@@ -63,7 +63,7 @@
                palmas: pmic@58 {
                        compatible = "ti,palmas";
                        reg = <0x58>;
-                       interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_LOW>;
+                       interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
 
                        #interrupt-cells = <2>;
                        interrupt-controller;
index 6403e0d..e52b824 100644 (file)
         *   Pin 41: BR_UART1_TXD
         *   Pin 44: BR_UART1_RXD
         */
-       serial@0,70006000 {
+       serial@70006000 {
                compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart";
                status = "okay";
        };
         *   Pin 71: UART2_CTS_L
         *   Pin 74: UART2_RTS_L
         */
-       serial@0,70006040 {
+       serial@70006040 {
                compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart";
                status = "okay";
        };
index bc5f507..9f157e7 100644 (file)
@@ -295,6 +295,7 @@ __und_svc_fault:
        bl      __und_fault
 
 __und_svc_finish:
+       get_thread_info tsk
        ldr     r5, [sp, #S_PSR]                @ Get SVC cpsr
        svc_exit r5                             @ return from exception
  UNWIND(.fnend         )
index 0b1e4a9..15d073a 100644 (file)
@@ -142,6 +142,19 @@ ARM_BE8(orr        r7, r7, #(1 << 25))     @ HSCTLR.EE
        and     r7, #0x1f               @ Preserve HPMN
        mcr     p15, 4, r7, c1, c1, 1   @ HDCR
 
+       @ Make sure NS-SVC is initialised appropriately
+       mrc     p15, 0, r7, c1, c0, 0   @ SCTLR
+       orr     r7, #(1 << 5)           @ CP15 barriers enabled
+       bic     r7, #(3 << 7)           @ Clear SED/ITD for v8 (RES0 for v7)
+       bic     r7, #(3 << 19)          @ WXN and UWXN disabled
+       mcr     p15, 0, r7, c1, c0, 0   @ SCTLR
+
+       mrc     p15, 0, r7, c0, c0, 0   @ MIDR
+       mcr     p15, 4, r7, c0, c0, 0   @ VPIDR
+
+       mrc     p15, 0, r7, c0, c0, 5   @ MPIDR
+       mcr     p15, 4, r7, c0, c0, 5   @ VMPIDR
+
 #if !defined(ZIMAGE) && defined(CONFIG_ARM_ARCH_TIMER)
        @ make CNTP_* and CNTPCT accessible from PL1
        mrc     p15, 0, r7, c0, c1, 1   @ ID_PFR1
index bda27b6..29d0b23 100644 (file)
@@ -1309,7 +1309,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        smp_rmb();
 
        pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
-       if (is_error_pfn(pfn))
+       if (is_error_noslot_pfn(pfn))
                return -EFAULT;
 
        if (kvm_is_device_pfn(pfn)) {
index fd87205..0df062d 100644 (file)
@@ -271,6 +271,12 @@ static int __init imx_gpc_init(struct device_node *node,
        for (i = 0; i < IMR_NUM; i++)
                writel_relaxed(~0, gpc_base + GPC_IMR1 + i * 4);
 
+       /*
+        * Clear the OF_POPULATED flag set in of_irq_init so that
+        * later the GPC power domain driver will not be skipped.
+        */
+       of_node_clear_flag(node, OF_POPULATED);
+
        return 0;
 }
 IRQCHIP_DECLARE(imx_gpc, "fsl,imx6q-gpc", imx_gpc_init);
index 5d9bfab..6bb7d9c 100644 (file)
@@ -64,6 +64,7 @@ static void __init imx6ul_init_machine(void)
        if (parent == NULL)
                pr_warn("failed to initialize soc device\n");
 
+       of_platform_default_populate(NULL, NULL, parent);
        imx6ul_enet_init();
        imx_anatop_init();
        imx6ul_pm_init();
index 58924b3..fe708e2 100644 (file)
@@ -295,7 +295,7 @@ int imx6_set_lpm(enum mxc_cpu_pwr_mode mode)
                val &= ~BM_CLPCR_SBYOS;
                if (cpu_is_imx6sl())
                        val |= BM_CLPCR_BYPASS_PMIC_READY;
-               if (cpu_is_imx6sl() || cpu_is_imx6sx())
+               if (cpu_is_imx6sl() || cpu_is_imx6sx() || cpu_is_imx6ul())
                        val |= BM_CLPCR_BYP_MMDC_CH0_LPM_HS;
                else
                        val |= BM_CLPCR_BYP_MMDC_CH1_LPM_HS;
@@ -310,7 +310,7 @@ int imx6_set_lpm(enum mxc_cpu_pwr_mode mode)
                val |= 0x3 << BP_CLPCR_STBY_COUNT;
                val |= BM_CLPCR_VSTBY;
                val |= BM_CLPCR_SBYOS;
-               if (cpu_is_imx6sl())
+               if (cpu_is_imx6sl() || cpu_is_imx6sx())
                        val |= BM_CLPCR_BYPASS_PMIC_READY;
                if (cpu_is_imx6sl() || cpu_is_imx6sx() || cpu_is_imx6ul())
                        val |= BM_CLPCR_BYP_MMDC_CH0_LPM_HS;
index c073fb5..6f2d0ae 100644 (file)
@@ -220,9 +220,6 @@ static int am33xx_cm_wait_module_ready(u8 part, s16 inst, u16 clkctrl_offs,
 {
        int i = 0;
 
-       if (!clkctrl_offs)
-               return 0;
-
        omap_test_timeout(_is_module_ready(inst, clkctrl_offs),
                          MAX_MODULE_READY_TIME, i);
 
@@ -246,9 +243,6 @@ static int am33xx_cm_wait_module_idle(u8 part, s16 inst, u16 clkctrl_offs,
 {
        int i = 0;
 
-       if (!clkctrl_offs)
-               return 0;
-
        omap_test_timeout((_clkctrl_idlest(inst, clkctrl_offs) ==
                                CLKCTRL_IDLEST_DISABLED),
                                MAX_MODULE_READY_TIME, i);
index 2c0e07e..2ab27ad 100644 (file)
@@ -278,9 +278,6 @@ static int omap4_cminst_wait_module_ready(u8 part, s16 inst, u16 clkctrl_offs,
 {
        int i = 0;
 
-       if (!clkctrl_offs)
-               return 0;
-
        omap_test_timeout(_is_module_ready(part, inst, clkctrl_offs),
                          MAX_MODULE_READY_TIME, i);
 
@@ -304,9 +301,6 @@ static int omap4_cminst_wait_module_idle(u8 part, s16 inst, u16 clkctrl_offs,
 {
        int i = 0;
 
-       if (!clkctrl_offs)
-               return 0;
-
        omap_test_timeout((_clkctrl_idlest(part, inst, clkctrl_offs) ==
                           CLKCTRL_IDLEST_DISABLED),
                          MAX_MODULE_DISABLE_TIME, i);
index 5b70938..1052b29 100644 (file)
@@ -1053,6 +1053,10 @@ static int _omap4_wait_target_disable(struct omap_hwmod *oh)
        if (oh->flags & HWMOD_NO_IDLEST)
                return 0;
 
+       if (!oh->prcm.omap4.clkctrl_offs &&
+           !(oh->prcm.omap4.flags & HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET))
+               return 0;
+
        return omap_cm_wait_module_idle(oh->clkdm->prcm_partition,
                                        oh->clkdm->cm_inst,
                                        oh->prcm.omap4.clkctrl_offs, 0);
@@ -2971,6 +2975,10 @@ static int _omap4_wait_target_ready(struct omap_hwmod *oh)
        if (!_find_mpu_rt_port(oh))
                return 0;
 
+       if (!oh->prcm.omap4.clkctrl_offs &&
+           !(oh->prcm.omap4.flags & HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET))
+               return 0;
+
        /* XXX check module SIDLEMODE, hardreset status */
 
        return omap_cm_wait_module_ready(oh->clkdm->prcm_partition,
index 4041bad..7890401 100644 (file)
@@ -443,8 +443,12 @@ struct omap_hwmod_omap2_prcm {
  * HWMOD_OMAP4_NO_CONTEXT_LOSS_BIT: Some IP blocks don't have a PRCM
  *     module-level context loss register associated with them; this
  *     flag bit should be set in those cases
+ * HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET: Some IP blocks have a valid CLKCTRL
+ *     offset of zero; this flag bit should be set in those cases to
+ *     distinguish from hwmods that have no clkctrl offset.
  */
 #define HWMOD_OMAP4_NO_CONTEXT_LOSS_BIT                (1 << 0)
+#define HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET                (1 << 1)
 
 /**
  * struct omap_hwmod_omap4_prcm - OMAP4-specific PRCM data
index 55c5878..e2d84aa 100644 (file)
@@ -29,6 +29,7 @@
 #define CLKCTRL(oh, clkctrl) ((oh).prcm.omap4.clkctrl_offs = (clkctrl))
 #define RSTCTRL(oh, rstctrl) ((oh).prcm.omap4.rstctrl_offs = (rstctrl))
 #define RSTST(oh, rstst) ((oh).prcm.omap4.rstst_offs = (rstst))
+#define PRCM_FLAGS(oh, flag) ((oh).prcm.omap4.flags = (flag))
 
 /*
  * 'l3' class
@@ -1296,6 +1297,7 @@ static void omap_hwmod_am33xx_clkctrl(void)
        CLKCTRL(am33xx_i2c1_hwmod, AM33XX_CM_WKUP_I2C0_CLKCTRL_OFFSET);
        CLKCTRL(am33xx_wd_timer1_hwmod, AM33XX_CM_WKUP_WDT1_CLKCTRL_OFFSET);
        CLKCTRL(am33xx_rtc_hwmod, AM33XX_CM_RTC_RTC_CLKCTRL_OFFSET);
+       PRCM_FLAGS(am33xx_rtc_hwmod, HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET);
        CLKCTRL(am33xx_mmc2_hwmod, AM33XX_CM_PER_MMC2_CLKCTRL_OFFSET);
        CLKCTRL(am33xx_gpmc_hwmod, AM33XX_CM_PER_GPMC_CLKCTRL_OFFSET);
        CLKCTRL(am33xx_l4_ls_hwmod, AM33XX_CM_PER_L4LS_CLKCTRL_OFFSET);
index d72ee61..1cc4a6f 100644 (file)
@@ -722,8 +722,20 @@ static struct omap_hwmod omap3xxx_dss_dispc_hwmod = {
  * display serial interface controller
  */
 
+static struct omap_hwmod_class_sysconfig omap3xxx_dsi_sysc = {
+       .rev_offs       = 0x0000,
+       .sysc_offs      = 0x0010,
+       .syss_offs      = 0x0014,
+       .sysc_flags     = (SYSC_HAS_AUTOIDLE | SYSC_HAS_CLOCKACTIVITY |
+                          SYSC_HAS_ENAWAKEUP | SYSC_HAS_SIDLEMODE |
+                          SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS),
+       .idlemodes      = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
+       .sysc_fields    = &omap_hwmod_sysc_type1,
+};
+
 static struct omap_hwmod_class omap3xxx_dsi_hwmod_class = {
        .name = "dsi",
+       .sysc   = &omap3xxx_dsi_sysc,
 };
 
 static struct omap_hwmod_irq_info omap3xxx_dsi1_irqs[] = {
index c410d84..66070ac 100644 (file)
@@ -83,7 +83,8 @@ static struct resource smc91x_resources[] = {
 };
 
 static struct smc91x_platdata smc91x_platdata = {
-       .flags = SMC91X_USE_32BIT | SMC91X_USE_DMA | SMC91X_NOWAIT,
+       .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+                SMC91X_USE_DMA | SMC91X_NOWAIT,
 };
 
 static struct platform_device smc91x_device = {
index 3f06cd9..056369e 100644 (file)
@@ -120,7 +120,8 @@ static struct resource smc91x_resources[] = {
 };
 
 static struct smc91x_platdata xcep_smc91x_info = {
-       .flags  = SMC91X_USE_32BIT | SMC91X_NOWAIT | SMC91X_USE_DMA,
+       .flags  = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+                 SMC91X_NOWAIT | SMC91X_USE_DMA,
 };
 
 static struct platform_device smc91x_device = {
index baf1745..a0ead0a 100644 (file)
@@ -93,7 +93,8 @@ static struct smsc911x_platform_config smsc911x_config = {
 };
 
 static struct smc91x_platdata smc91x_platdata = {
-       .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
+       .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+                SMC91X_NOWAIT,
 };
 
 static struct platform_device realview_eth_device = {
index cbf53bb..0db4689 100644 (file)
@@ -125,6 +125,8 @@ static unsigned long clk_36864_get_rate(struct clk *clk)
 }
 
 static struct clkops clk_36864_ops = {
+       .enable         = clk_cpu_enable,
+       .disable        = clk_cpu_disable,
        .get_rate       = clk_36864_get_rate,
 };
 
@@ -140,9 +142,8 @@ static struct clk_lookup sa11xx_clkregs[] = {
        CLKDEV_INIT(NULL, "OSTIMER0", &clk_36864),
 };
 
-static int __init sa11xx_clk_init(void)
+int __init sa11xx_clk_init(void)
 {
        clkdev_add_table(sa11xx_clkregs, ARRAY_SIZE(sa11xx_clkregs));
        return 0;
 }
-core_initcall(sa11xx_clk_init);
index 345e63f..3e09bed 100644 (file)
@@ -34,6 +34,7 @@
 
 #include <mach/hardware.h>
 #include <mach/irqs.h>
+#include <mach/reset.h>
 
 #include "generic.h"
 #include <clocksource/pxa.h>
@@ -95,6 +96,8 @@ static void sa1100_power_off(void)
 
 void sa11x0_restart(enum reboot_mode mode, const char *cmd)
 {
+       clear_reset_status(RESET_STATUS_ALL);
+
        if (mode == REBOOT_SOFT) {
                /* Jump into ROM at address 0 */
                soft_restart(0);
@@ -388,6 +391,7 @@ void __init sa1100_init_irq(void)
        sa11x0_init_irq_nodt(IRQ_GPIO0_SC, irq_resource.start);
 
        sa1100_init_gpio();
+       sa11xx_clk_init();
 }
 
 /*
index 0d92e11..68199b6 100644 (file)
@@ -44,3 +44,5 @@ int sa11x0_pm_init(void);
 #else
 static inline int sa11x0_pm_init(void) { return 0; }
 #endif
+
+int sa11xx_clk_init(void);
index 1525d7b..88149f8 100644 (file)
@@ -45,7 +45,7 @@ static struct resource smc91x_resources[] = {
 };
 
 static struct smc91x_platdata smc91x_platdata = {
-       .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT,
+       .flags = SMC91X_USE_16BIT | SMC91X_USE_8BIT | SMC91X_NOWAIT,
 };
 
 static struct platform_device smc91x_device = {
index 62f4d01..6344913 100644 (file)
@@ -728,7 +728,8 @@ static void *__init late_alloc(unsigned long sz)
 {
        void *ptr = (void *)__get_free_pages(PGALLOC_GFP, get_order(sz));
 
-       BUG_ON(!ptr);
+       if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
+               BUG();
        return ptr;
 }
 
@@ -1155,10 +1156,19 @@ void __init sanity_check_meminfo(void)
 {
        phys_addr_t memblock_limit = 0;
        int highmem = 0;
-       phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1;
+       u64 vmalloc_limit;
        struct memblock_region *reg;
        bool should_use_highmem = false;
 
+       /*
+        * Let's use our own (unoptimized) equivalent of __pa() that is
+        * not affected by wrap-arounds when sizeof(phys_addr_t) == 4.
+        * The result is used as the upper bound on physical memory address
+        * and may itself be outside the valid range for which phys_addr_t
+        * and therefore __pa() is defined.
+        */
+       vmalloc_limit = (u64)(uintptr_t)vmalloc_min - PAGE_OFFSET + PHYS_OFFSET;
+
        for_each_memblock(memory, reg) {
                phys_addr_t block_start = reg->base;
                phys_addr_t block_end = reg->base + reg->size;
@@ -1183,10 +1193,11 @@ void __init sanity_check_meminfo(void)
                        if (reg->size > size_limit) {
                                phys_addr_t overlap_size = reg->size - size_limit;
 
-                               pr_notice("Truncating RAM at %pa-%pa to -%pa",
-                                         &block_start, &block_end, &vmalloc_limit);
-                               memblock_remove(vmalloc_limit, overlap_size);
+                               pr_notice("Truncating RAM at %pa-%pa",
+                                         &block_start, &block_end);
                                block_end = vmalloc_limit;
+                               pr_cont(" to -%pa", &block_end);
+                               memblock_remove(vmalloc_limit, overlap_size);
                                should_use_highmem = true;
                        }
                }
index a7123b4..d00d52c 100644 (file)
@@ -16,6 +16,7 @@
 #include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
+#include <asm/memory.h>
 
 #include "proc-macros.S"
 
index b0b82f5..3d2cef6 100644 (file)
@@ -50,7 +50,7 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
 static struct vcpu_info __percpu *xen_vcpu_info;
 
 /* Linux <-> Xen vCPU id mapping */
-DEFINE_PER_CPU(int, xen_vcpu_id) = -1;
+DEFINE_PER_CPU(uint32_t, xen_vcpu_id);
 EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
 
 /* These are unused until we support booting "pre-ballooned" */
index d02a900..4f44d11 100644 (file)
                #io-channel-cells = <1>;
                clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>;
                clock-names = "saradc", "apb_pclk";
+               resets = <&cru SRST_SARADC>;
+               reset-names = "saradc-apb";
                status = "disabled";
        };
 
index 0a456be..2fee2f5 100644 (file)
@@ -199,19 +199,19 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
 #define _percpu_read(pcp)                                              \
 ({                                                                     \
        typeof(pcp) __retval;                                           \
-       preempt_disable();                                              \
+       preempt_disable_notrace();                                      \
        __retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)),      \
                                              sizeof(pcp));             \
-       preempt_enable();                                               \
+       preempt_enable_notrace();                                       \
        __retval;                                                       \
 })
 
 #define _percpu_write(pcp, val)                                                \
 do {                                                                   \
-       preempt_disable();                                              \
+       preempt_disable_notrace();                                      \
        __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val),       \
                                sizeof(pcp));                           \
-       preempt_enable();                                               \
+       preempt_enable_notrace();                                       \
 } while(0)                                                             \
 
 #define _pcp_protect(operation, pcp, val)                      \
index e875a5a..89206b5 100644 (file)
@@ -363,4 +363,14 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 #define arch_read_relax(lock)  cpu_relax()
 #define arch_write_relax(lock) cpu_relax()
 
+/*
+ * Accesses appearing in program order before a spin_lock() operation
+ * can be reordered with accesses inside the critical section, by virtue
+ * of arch_spin_lock being constructed using acquire semantics.
+ *
+ * In cases where this is problematic (e.g. try_to_wake_up), an
+ * smp_mb__before_spinlock() can restore the required ordering.
+ */
+#define smp_mb__before_spinlock()      smp_mb()
+
 #endif /* __ASM_SPINLOCK_H */
index b77f583..3e7b050 100644 (file)
@@ -757,6 +757,9 @@ ENTRY(__enable_mmu)
        isb
        bl      __create_page_tables            // recreate kernel mapping
 
+       tlbi    vmalle1                         // Remove any stale TLB entries
+       dsb     nsh
+
        msr     sctlr_el1, x19                  // re-enable the MMU
        isb
        ic      iallu                           // flush instructions fetched
index 9a3aec9..ccf79d8 100644 (file)
@@ -101,12 +101,20 @@ ENTRY(cpu_resume)
        bl      el2_setup               // if in EL2 drop to EL1 cleanly
        /* enable the MMU early - so we can access sleep_save_stash by va */
        adr_l   lr, __enable_mmu        /* __cpu_setup will return here */
-       ldr     x27, =_cpu_resume       /* __enable_mmu will branch here */
+       adr_l   x27, _resume_switched   /* __enable_mmu will branch here */
        adrp    x25, idmap_pg_dir
        adrp    x26, swapper_pg_dir
        b       __cpu_setup
 ENDPROC(cpu_resume)
 
+       .pushsection    ".idmap.text", "ax"
+_resume_switched:
+       ldr     x8, =_cpu_resume
+       br      x8
+ENDPROC(_resume_switched)
+       .ltorg
+       .popsection
+
 ENTRY(_cpu_resume)
        mrs     x1, mpidr_el1
        adrp    x8, mpidr_hash
index ae7855f..5a84b45 100644 (file)
@@ -256,7 +256,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
 
        /*
         * We must restore the 32-bit state before the sysregs, thanks
-        * to Cortex-A57 erratum #852523.
+        * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
         */
        __sysreg32_restore_state(vcpu);
        __sysreg_restore_guest_state(guest_ctxt);
index b0b225c..e51367d 100644 (file)
@@ -823,14 +823,6 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
  * Architected system registers.
  * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
  *
- * We could trap ID_DFR0 and tell the guest we don't support performance
- * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
- * NAKed, so it will read the PMCR anyway.
- *
- * Therefore we tell the guest we have 0 counters.  Unfortunately, we
- * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
- * all PM registers, which doesn't crash the guest kernel at least.
- *
  * Debug handling: We do trap most, if not all debug related system
  * registers. The implementation is good enough to ensure that a guest
  * can use these with minimal performance degradation. The drawback is
@@ -1360,7 +1352,7 @@ static const struct sys_reg_desc cp15_regs[] = {
        { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
 
        /* ICC_SRE */
-       { Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi },
+       { Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre },
 
        { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
 
index f94b80e..9c3e75d 100644 (file)
@@ -242,7 +242,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
 
 static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
 {
-       pte_t *pte = pte_offset_kernel(pmd, 0);
+       pte_t *pte = pte_offset_kernel(pmd, 0UL);
        unsigned long addr;
        unsigned i;
 
@@ -254,7 +254,7 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
 
 static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
 {
-       pmd_t *pmd = pmd_offset(pud, 0);
+       pmd_t *pmd = pmd_offset(pud, 0UL);
        unsigned long addr;
        unsigned i;
 
@@ -271,7 +271,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
 
 static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 {
-       pud_t *pud = pud_offset(pgd, 0);
+       pud_t *pud = pud_offset(pgd, 0UL);
        unsigned long addr;
        unsigned i;
 
index c7fe3ec..5bb15ea 100644 (file)
@@ -23,6 +23,8 @@
 #include <linux/module.h>
 #include <linux/of.h>
 
+#include <asm/acpi.h>
+
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
 nodemask_t numa_nodes_parsed __initdata;
index 5bb61de..9d37e96 100644 (file)
@@ -100,7 +100,16 @@ ENTRY(cpu_do_resume)
 
        msr     tcr_el1, x8
        msr     vbar_el1, x9
+
+       /*
+        * __cpu_setup() cleared MDSCR_EL1.MDE and friends, before unmasking
+        * debug exceptions. By restoring MDSCR_EL1 here, we may take a debug
+        * exception. Mask them until local_dbg_restore() in cpu_suspend()
+        * resets them.
+        */
+       disable_dbg
        msr     mdscr_el1, x10
+
        msr     sctlr_el1, x12
        /*
         * Restore oslsr_el1 by writing oslar_el1
index c6db52b..10c5777 100644 (file)
@@ -146,7 +146,8 @@ static struct platform_device hitachi_fb_device = {
 #include <linux/smc91x.h>
 
 static struct smc91x_platdata smc91x_info = {
-       .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
+       .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+                SMC91X_NOWAIT,
        .leda = RPC_LED_100_10,
        .ledb = RPC_LED_TX_RX,
 };
index f35525b..57d1c43 100644 (file)
@@ -134,7 +134,8 @@ static struct platform_device net2272_bfin_device = {
 #include <linux/smc91x.h>
 
 static struct smc91x_platdata smc91x_info = {
-       .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
+       .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+                SMC91X_NOWAIT,
        .leda = RPC_LED_100_10,
        .ledb = RPC_LED_TX_RX,
 };
index 465c709..0472927 100644 (file)
@@ -241,8 +241,7 @@ extern unsigned long __must_check __copy_user (void __user *to, const void __use
 static inline unsigned long
 __copy_to_user (void __user *to, const void *from, unsigned long count)
 {
-       if (!__builtin_constant_p(count))
-               check_object_size(from, count, true);
+       check_object_size(from, count, true);
 
        return __copy_user(to, (__force void __user *) from, count);
 }
@@ -250,8 +249,7 @@ __copy_to_user (void __user *to, const void *from, unsigned long count)
 static inline unsigned long
 __copy_from_user (void *to, const void __user *from, unsigned long count)
 {
-       if (!__builtin_constant_p(count))
-               check_object_size(to, count, false);
+       check_object_size(to, count, false);
 
        return __copy_user((__force void __user *) to, from, count);
 }
@@ -265,8 +263,7 @@ __copy_from_user (void *to, const void __user *from, unsigned long count)
        long __cu_len = (n);                                                            \
                                                                                        \
        if (__access_ok(__cu_to, __cu_len, get_fs())) {                                 \
-               if (!__builtin_constant_p(n))                                           \
-                       check_object_size(__cu_from, __cu_len, true);                   \
+               check_object_size(__cu_from, __cu_len, true);                   \
                __cu_len = __copy_user(__cu_to, (__force void __user *)  __cu_from, __cu_len);  \
        }                                                                               \
        __cu_len;                                                                       \
@@ -280,8 +277,7 @@ __copy_from_user (void *to, const void __user *from, unsigned long count)
                                                                                        \
        __chk_user_ptr(__cu_from);                                                      \
        if (__access_ok(__cu_from, __cu_len, get_fs())) {                               \
-               if (!__builtin_constant_p(n))                                           \
-                       check_object_size(__cu_to, __cu_len, false);                    \
+               check_object_size(__cu_to, __cu_len, false);                    \
                __cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len);   \
        }                                                                               \
        __cu_len;                                                                       \
index ea0cd97..5f98759 100644 (file)
@@ -164,7 +164,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
  */
 static inline unsigned long ___pa(unsigned long x)
 {
-       if (config_enabled(CONFIG_64BIT)) {
+       if (IS_ENABLED(CONFIG_64BIT)) {
                /*
                 * For MIPS64 the virtual address may either be in one of
                 * the compatibility segements ckseg0 or ckseg1, or it may
@@ -173,7 +173,7 @@ static inline unsigned long ___pa(unsigned long x)
                return x < CKSEG0 ? XPHYSADDR(x) : CPHYSADDR(x);
        }
 
-       if (!config_enabled(CONFIG_EVA)) {
+       if (!IS_ENABLED(CONFIG_EVA)) {
                /*
                 * We're using the standard MIPS32 legacy memory map, ie.
                 * the address x is going to be in kseg0 or kseg1. We can
index 6cfdcf5..121008c 100644 (file)
@@ -40,7 +40,7 @@ static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn)
        srcu_idx = srcu_read_lock(&kvm->srcu);
        pfn = gfn_to_pfn(kvm, gfn);
 
-       if (is_error_pfn(pfn)) {
+       if (is_error_noslot_pfn(pfn)) {
                kvm_err("Couldn't get pfn for gfn %#llx!\n", gfn);
                err = -EFAULT;
                goto out;
index cd87781..af12c2d 100644 (file)
@@ -1,6 +1,5 @@
 config PARISC
        def_bool y
-       select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
        select ARCH_MIGHT_HAVE_PC_PARPORT
        select HAVE_IDE
        select HAVE_OPROFILE
index 1a8f6f9..f6a4c01 100644 (file)
@@ -245,7 +245,6 @@ CONFIG_DEBUG_RT_MUTEXES=y
 CONFIG_PROVE_RCU_DELAY=y
 CONFIG_DEBUG_BLOCK_EXT_DEVT=y
 CONFIG_LATENCYTOP=y
-CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
 CONFIG_KEYS=y
 # CONFIG_CRYPTO_HW is not set
 CONFIG_FONTS=y
index 7e07926..c564e6e 100644 (file)
@@ -291,7 +291,6 @@ CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
 CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_TIMER_STATS=y
-CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
index 0f59fd9..e915048 100644 (file)
@@ -208,13 +208,13 @@ unsigned long copy_in_user(void __user *dst, const void __user *src, unsigned lo
 #define __copy_to_user_inatomic __copy_to_user
 #define __copy_from_user_inatomic __copy_from_user
 
-extern void copy_from_user_overflow(void)
-#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
-        __compiletime_error("copy_from_user() buffer size is not provably correct")
-#else
-        __compiletime_warning("copy_from_user() buffer size is not provably correct")
-#endif
-;
+extern void __compiletime_error("usercopy buffer size is too small")
+__bad_copy_user(void);
+
+static inline void copy_user_overflow(int size, unsigned long count)
+{
+       WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
+}
 
 static inline unsigned long __must_check copy_from_user(void *to,
                                           const void __user *from,
@@ -223,10 +223,12 @@ static inline unsigned long __must_check copy_from_user(void *to,
         int sz = __compiletime_object_size(to);
         int ret = -EFAULT;
 
-        if (likely(sz == -1 || !__builtin_constant_p(n) || sz >= n))
+        if (likely(sz == -1 || sz >= n))
                 ret = __copy_from_user(to, from, n);
-        else
-                copy_from_user_overflow();
+        else if (!__builtin_constant_p(n))
+               copy_user_overflow(sz, n);
+       else
+                __bad_copy_user();
 
         return ret;
 }
index c0ae625..274d5bc 100644 (file)
 #define        ENOTCONN        235     /* Transport endpoint is not connected */
 #define        ESHUTDOWN       236     /* Cannot send after transport endpoint shutdown */
 #define        ETOOMANYREFS    237     /* Too many references: cannot splice */
-#define EREFUSED       ECONNREFUSED    /* for HP's NFS apparently */
 #define        ETIMEDOUT       238     /* Connection timed out */
 #define        ECONNREFUSED    239     /* Connection refused */
-#define EREMOTERELEASE 240     /* Remote peer released connection */
+#define        EREFUSED        ECONNREFUSED    /* for HP's NFS apparently */
+#define        EREMOTERELEASE  240     /* Remote peer released connection */
 #define        EHOSTDOWN       241     /* Host is down */
 #define        EHOSTUNREACH    242     /* No route to host */
 
index 5adc339..0c2a94a 100644 (file)
@@ -51,8 +51,6 @@ EXPORT_SYMBOL(_parisc_requires_coherency);
 
 DEFINE_PER_CPU(struct cpuinfo_parisc, cpu_data);
 
-extern int update_cr16_clocksource(void);      /* from time.c */
-
 /*
 **     PARISC CPU driver - claim "device" and initialize CPU data structures.
 **
@@ -228,12 +226,6 @@ static int processor_probe(struct parisc_device *dev)
        }
 #endif
 
-       /* If we've registered more than one cpu,
-        * we'll use the jiffies clocksource since cr16
-        * is not synchronized between CPUs.
-        */
-       update_cr16_clocksource();
-
        return 0;
 }
 
index 505cf1a..4b0b963 100644 (file)
@@ -221,18 +221,6 @@ static struct clocksource clocksource_cr16 = {
        .flags                  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-int update_cr16_clocksource(void)
-{
-       /* since the cr16 cycle counters are not synchronized across CPUs,
-          we'll check if we should switch to a safe clocksource: */
-       if (clocksource_cr16.rating != 0 && num_online_cpus() > 1) {
-               clocksource_change_rating(&clocksource_cr16, 0);
-               return 1;
-       }
-
-       return 0;
-}
-
 void __init start_cpu_itimer(void)
 {
        unsigned int cpu = smp_processor_id();
index 666bef4..9377bdf 100644 (file)
@@ -3,6 +3,7 @@
 
 #ifndef __ASSEMBLY__
 #include <linux/cpumask.h>
+#include <asm/cpu_has_feature.h>
 
 /*
  * Mapping of threads to cores
index 88b4901..85b7a1a 100644 (file)
@@ -21,7 +21,7 @@
 #ifndef __ASM_PPC64_HMI_H__
 #define __ASM_PPC64_HMI_H__
 
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 
 #define        CORE_TB_RESYNC_REQ_BIT          63
 #define MAX_SUBCORE_PER_CORE           4
index 148303e..6a6792b 100644 (file)
@@ -183,11 +183,6 @@ struct paca_struct {
         */
        u16 in_mce;
        u8 hmi_event_available;          /* HMI event is available */
-       /*
-        * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
-        * more details
-        */
-       struct sibling_subcore_state *sibling_subcore_state;
 #endif
 
        /* Stuff for accurate time accounting */
@@ -202,6 +197,13 @@ struct paca_struct {
        struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
 #endif
        struct kvmppc_host_state kvm_hstate;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+       /*
+        * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
+        * more details
+        */
+       struct sibling_subcore_state *sibling_subcore_state;
+#endif
 #endif
 };
 
index b5e88e4..c0309c5 100644 (file)
@@ -301,6 +301,7 @@ extern void pci_process_bridge_OF_ranges(struct pci_controller *hose,
 /* Allocate & free a PCI host bridge structure */
 extern struct pci_controller *pcibios_alloc_controller(struct device_node *dev);
 extern void pcibios_free_controller(struct pci_controller *phb);
+extern void pcibios_free_controller_deferred(struct pci_host_bridge *bridge);
 
 #ifdef CONFIG_PCI
 extern int pcibios_vaddr_is_ioport(void __iomem *address);
index c1dc6c1..f1e3824 100644 (file)
@@ -311,14 +311,12 @@ static inline unsigned long copy_from_user(void *to,
        unsigned long over;
 
        if (access_ok(VERIFY_READ, from, n)) {
-               if (!__builtin_constant_p(n))
-                       check_object_size(to, n, false);
+               check_object_size(to, n, false);
                return __copy_tofrom_user((__force void __user *)to, from, n);
        }
        if ((unsigned long)from < TASK_SIZE) {
                over = (unsigned long)from + n - TASK_SIZE;
-               if (!__builtin_constant_p(n - over))
-                       check_object_size(to, n - over, false);
+               check_object_size(to, n - over, false);
                return __copy_tofrom_user((__force void __user *)to, from,
                                n - over) + over;
        }
@@ -331,14 +329,12 @@ static inline unsigned long copy_to_user(void __user *to,
        unsigned long over;
 
        if (access_ok(VERIFY_WRITE, to, n)) {
-               if (!__builtin_constant_p(n))
-                       check_object_size(from, n, true);
+               check_object_size(from, n, true);
                return __copy_tofrom_user(to, (__force void __user *)from, n);
        }
        if ((unsigned long)to < TASK_SIZE) {
                over = (unsigned long)to + n - TASK_SIZE;
-               if (!__builtin_constant_p(n))
-                       check_object_size(from, n - over, true);
+               check_object_size(from, n - over, true);
                return __copy_tofrom_user(to, (__force void __user *)from,
                                n - over) + over;
        }
@@ -383,8 +379,7 @@ static inline unsigned long __copy_from_user_inatomic(void *to,
                        return 0;
        }
 
-       if (!__builtin_constant_p(n))
-               check_object_size(to, n, false);
+       check_object_size(to, n, false);
 
        return __copy_tofrom_user((__force void __user *)to, from, n);
 }
@@ -412,8 +407,8 @@ static inline unsigned long __copy_to_user_inatomic(void __user *to,
                if (ret == 0)
                        return 0;
        }
-       if (!__builtin_constant_p(n))
-               check_object_size(from, n, true);
+
+       check_object_size(from, n, true);
 
        return __copy_tofrom_user(to, (__force const void __user *)from, n);
 }
index b2027a5..fe4c075 100644 (file)
@@ -41,7 +41,7 @@ obj-$(CONFIG_VDSO32)          += vdso32/
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)       += hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)    += cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj-$(CONFIG_PPC_BOOK3S_64)    += cpu_setup_power.o
-obj-$(CONFIG_PPC_BOOK3S_64)    += mce.o mce_power.o hmi.o
+obj-$(CONFIG_PPC_BOOK3S_64)    += mce.o mce_power.o
 obj-$(CONFIG_PPC_BOOK3E_64)    += exceptions-64e.o idle_book3e.o
 obj-$(CONFIG_PPC64)            += vdso64/
 obj-$(CONFIG_ALTIVEC)          += vecemu.o
index 6b8bc0d..5afd03e 100644 (file)
@@ -368,13 +368,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 tabort_syscall:
        /* Firstly we need to enable TM in the kernel */
        mfmsr   r10
-       li      r13, 1
-       rldimi  r10, r13, MSR_TM_LG, 63-MSR_TM_LG
+       li      r9, 1
+       rldimi  r10, r9, MSR_TM_LG, 63-MSR_TM_LG
        mtmsrd  r10, 0
 
        /* tabort, this dooms the transaction, nothing else */
-       li      r13, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
-       TABORT(R13)
+       li      r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
+       TABORT(R9)
 
        /*
         * Return directly to userspace. We have corrupted user register state,
@@ -382,8 +382,8 @@ tabort_syscall:
         * resume after the tbegin of the aborted transaction with the
         * checkpointed register state.
         */
-       li      r13, MSR_RI
-       andc    r10, r10, r13
+       li      r9, MSR_RI
+       andc    r10, r10, r9
        mtmsrd  r10, 1
        mtspr   SPRN_SRR0, r11
        mtspr   SPRN_SRR1, r12
index df6d45e..bffec73 100644 (file)
@@ -485,7 +485,23 @@ machine_check_fwnmi:
        EXCEPTION_PROLOG_0(PACA_EXMC)
 machine_check_pSeries_0:
        EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST, 0x200)
-       EXCEPTION_PROLOG_PSERIES_1(machine_check_common, EXC_STD)
+       /*
+        * The following is essentially EXCEPTION_PROLOG_PSERIES_1 with the
+        * difference that MSR_RI is not enabled, because PACA_EXMC is being
+        * used, so nested machine check corrupts it. machine_check_common
+        * enables MSR_RI.
+        */
+       ld      r12,PACAKBASE(r13)
+       ld      r10,PACAKMSR(r13)
+       xori    r10,r10,MSR_RI
+       mfspr   r11,SPRN_SRR0
+       LOAD_HANDLER(r12, machine_check_common)
+       mtspr   SPRN_SRR0,r12
+       mfspr   r12,SPRN_SRR1
+       mtspr   SPRN_SRR1,r10
+       rfid
+       b       .       /* prevent speculative execution */
+
        KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200)
        KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
        KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
@@ -969,14 +985,17 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 machine_check_common:
 
        mfspr   r10,SPRN_DAR
-       std     r10,PACA_EXGEN+EX_DAR(r13)
+       std     r10,PACA_EXMC+EX_DAR(r13)
        mfspr   r10,SPRN_DSISR
-       stw     r10,PACA_EXGEN+EX_DSISR(r13)
+       stw     r10,PACA_EXMC+EX_DSISR(r13)
        EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
        FINISH_NAP
        RECONCILE_IRQ_STATE(r10, r11)
-       ld      r3,PACA_EXGEN+EX_DAR(r13)
-       lwz     r4,PACA_EXGEN+EX_DSISR(r13)
+       ld      r3,PACA_EXMC+EX_DAR(r13)
+       lwz     r4,PACA_EXMC+EX_DSISR(r13)
+       /* Enable MSR_RI when finished with PACA_EXMC */
+       li      r10,MSR_RI
+       mtmsrd  r10,1
        std     r3,_DAR(r1)
        std     r4,_DSISR(r1)
        bl      save_nvgprs
diff --git a/arch/powerpc/kernel/hmi.c b/arch/powerpc/kernel/hmi.c
deleted file mode 100644 (file)
index e3f738e..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Hypervisor Maintenance Interrupt (HMI) handling.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.
- *
- * Copyright 2015 IBM Corporation
- * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
- */
-
-#undef DEBUG
-
-#include <linux/types.h>
-#include <linux/compiler.h>
-#include <asm/paca.h>
-#include <asm/hmi.h>
-
-void wait_for_subcore_guest_exit(void)
-{
-       int i;
-
-       /*
-        * NULL bitmap pointer indicates that KVM module hasn't
-        * been loaded yet and hence no guests are running.
-        * If no KVM is in use, no need to co-ordinate among threads
-        * as all of them will always be in host and no one is going
-        * to modify TB other than the opal hmi handler.
-        * Hence, just return from here.
-        */
-       if (!local_paca->sibling_subcore_state)
-               return;
-
-       for (i = 0; i < MAX_SUBCORE_PER_CORE; i++)
-               while (local_paca->sibling_subcore_state->in_guest[i])
-                       cpu_relax();
-}
-
-void wait_for_tb_resync(void)
-{
-       if (!local_paca->sibling_subcore_state)
-               return;
-
-       while (test_bit(CORE_TB_RESYNC_REQ_BIT,
-                               &local_paca->sibling_subcore_state->flags))
-               cpu_relax();
-}
index 3ed8ec0..e785cc9 100644 (file)
@@ -29,7 +29,7 @@
 #include <linux/kprobes.h>
 #include <linux/ptrace.h>
 #include <linux/preempt.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/kdebug.h>
 #include <linux/slab.h>
 #include <asm/code-patching.h>
index 7fdf324..e589080 100644 (file)
@@ -153,6 +153,42 @@ void pcibios_free_controller(struct pci_controller *phb)
 }
 EXPORT_SYMBOL_GPL(pcibios_free_controller);
 
+/*
+ * This function is used to call pcibios_free_controller()
+ * in a deferred manner: a callback from the PCI subsystem.
+ *
+ * _*DO NOT*_ call pcibios_free_controller() explicitly if
+ * this is used (or it may access an invalid *phb pointer).
+ *
+ * The callback occurs when all references to the root bus
+ * are dropped (e.g., child buses/devices and their users).
+ *
+ * It's called as .release_fn() of 'struct pci_host_bridge'
+ * which is associated with the 'struct pci_controller.bus'
+ * (root bus) - it expects .release_data to hold a pointer
+ * to 'struct pci_controller'.
+ *
+ * In order to use it, register .release_fn()/release_data
+ * like this:
+ *
+ * pci_set_host_bridge_release(bridge,
+ *                             pcibios_free_controller_deferred
+ *                             (void *) phb);
+ *
+ * e.g. in the pcibios_root_bridge_prepare() callback from
+ * pci_create_root_bus().
+ */
+void pcibios_free_controller_deferred(struct pci_host_bridge *bridge)
+{
+       struct pci_controller *phb = (struct pci_controller *)
+                                        bridge->release_data;
+
+       pr_debug("domain %d, dynamic %d\n", phb->global_number, phb->is_dynamic);
+
+       pcibios_free_controller(phb);
+}
+EXPORT_SYMBOL_GPL(pcibios_free_controller_deferred);
+
 /*
  * The function is used to return the minimal alignment
  * for memory or I/O windows of the associated P2P bridge.
index 4e74fc5..d3eff99 100644 (file)
@@ -695,7 +695,7 @@ unsigned char ibm_architecture_vec[] = {
        OV4_MIN_ENT_CAP,                /* minimum VP entitled capacity */
 
        /* option vector 5: PAPR/OF options */
-       VECTOR_LENGTH(18),              /* length */
+       VECTOR_LENGTH(21),              /* length */
        0,                              /* don't ignore, don't halt */
        OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) |
        OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) |
@@ -726,8 +726,11 @@ unsigned char ibm_architecture_vec[] = {
        0,
        0,
        OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) |
-       OV5_FEAT(OV5_PFO_HW_842),
-       OV5_FEAT(OV5_SUB_PROCESSORS),
+       OV5_FEAT(OV5_PFO_HW_842),                               /* Byte 17 */
+       0,                                                      /* Byte 18 */
+       0,                                                      /* Byte 19 */
+       0,                                                      /* Byte 20 */
+       OV5_FEAT(OV5_SUB_PROCESSORS),                           /* Byte 21 */
 
        /* option vector 6: IBM PAPR hints */
        VECTOR_LENGTH(3),               /* length */
index b6aa378..a7daf74 100644 (file)
@@ -1226,7 +1226,21 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
                (regs->gpr[1] + __SIGNAL_FRAMESIZE + 16);
        if (!access_ok(VERIFY_READ, rt_sf, sizeof(*rt_sf)))
                goto bad;
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       /*
+        * If there is a transactional state then throw it away.
+        * The purpose of a sigreturn is to destroy all traces of the
+        * signal frame, this includes any transactional state created
+        * within in. We only check for suspended as we can never be
+        * active in the kernel, we are active, there is nothing better to
+        * do than go ahead and Bad Thing later.
+        * The cause is not important as there will never be a
+        * recheckpoint so it's not user visible.
+        */
+       if (MSR_TM_SUSPENDED(mfmsr()))
+               tm_reclaim_current(0);
+
        if (__get_user(tmp, &rt_sf->uc.uc_link))
                goto bad;
        uc_transact = (struct ucontext __user *)(uintptr_t)tmp;
index 7e49984..70409bb 100644 (file)
@@ -676,7 +676,21 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
        if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
                goto badframe;
        set_current_blocked(&set);
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       /*
+        * If there is a transactional state then throw it away.
+        * The purpose of a sigreturn is to destroy all traces of the
+        * signal frame, this includes any transactional state created
+        * within in. We only check for suspended as we can never be
+        * active in the kernel, we are active, there is nothing better to
+        * do than go ahead and Bad Thing later.
+        * The cause is not important as there will never be a
+        * recheckpoint so it's not user visible.
+        */
+       if (MSR_TM_SUSPENDED(mfmsr()))
+               tm_reclaim_current(0);
+
        if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
                goto badframe;
        if (MSR_TM_ACTIVE(msr)) {
index 25a3905..9c6f3fd 100644 (file)
@@ -830,7 +830,7 @@ int __cpu_disable(void)
 
        /* Update sibling maps */
        base = cpu_first_thread_sibling(cpu);
-       for (i = 0; i < threads_per_core; i++) {
+       for (i = 0; i < threads_per_core && base + i < nr_cpu_ids; i++) {
                cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
                cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
                cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
index 2cb5892..62859eb 100644 (file)
@@ -25,7 +25,8 @@
 #include <linux/user.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/extable.h>
+#include <linux/module.h>      /* print_modules */
 #include <linux/prctl.h>
 #include <linux/delay.h>
 #include <linux/kprobes.h>
index 1f9e552..855d4b9 100644 (file)
@@ -78,6 +78,7 @@ kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
 
 ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+       book3s_hv_hmi.o \
        book3s_hv_rmhandlers.o \
        book3s_hv_rm_mmu.o \
        book3s_hv_ras.o \
diff --git a/arch/powerpc/kvm/book3s_hv_hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c
new file mode 100644 (file)
index 0000000..e3f738e
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Hypervisor Maintenance Interrupt (HMI) handling.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.
+ *
+ * Copyright 2015 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/paca.h>
+#include <asm/hmi.h>
+
+void wait_for_subcore_guest_exit(void)
+{
+       int i;
+
+       /*
+        * NULL bitmap pointer indicates that KVM module hasn't
+        * been loaded yet and hence no guests are running.
+        * If no KVM is in use, no need to co-ordinate among threads
+        * as all of them will always be in host and no one is going
+        * to modify TB other than the opal hmi handler.
+        * Hence, just return from here.
+        */
+       if (!local_paca->sibling_subcore_state)
+               return;
+
+       for (i = 0; i < MAX_SUBCORE_PER_CORE; i++)
+               while (local_paca->sibling_subcore_state->in_guest[i])
+                       cpu_relax();
+}
+
+void wait_for_tb_resync(void)
+{
+       if (!local_paca->sibling_subcore_state)
+               return;
+
+       while (test_bit(CORE_TB_RESYNC_REQ_BIT,
+                               &local_paca->sibling_subcore_state->flags))
+               cpu_relax();
+}
index 0a57fe6..aa8214f 100644 (file)
@@ -127,18 +127,19 @@ _GLOBAL(csum_partial_copy_generic)
        stw     r7,12(r1)
        stw     r8,8(r1)
 
-       rlwinm  r0,r4,3,0x8
-       rlwnm   r6,r6,r0,0,31   /* odd destination address: rotate one byte */
-       cmplwi  cr7,r0,0        /* is destination address even ? */
        addic   r12,r6,0
        addi    r6,r4,-4
        neg     r0,r4
        addi    r4,r3,-4
        andi.   r0,r0,CACHELINE_MASK    /* # bytes to start of cache line */
+       crset   4*cr7+eq
        beq     58f
 
        cmplw   0,r5,r0                 /* is this more than total to do? */
        blt     63f                     /* if not much to do */
+       rlwinm  r7,r6,3,0x8
+       rlwnm   r12,r12,r7,0,31 /* odd destination address: rotate one byte */
+       cmplwi  cr7,r7,0        /* is destination address even ? */
        andi.   r8,r0,3                 /* get it word-aligned first */
        mtctr   r8
        beq+    61f
index a4db22f..bb1ffc5 100644 (file)
@@ -26,7 +26,7 @@
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/highmem.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
 #include <linux/perf_event.h>
index dfdb90c..9f19834 100644 (file)
@@ -113,7 +113,12 @@ BEGIN_FTR_SECTION
 END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
        b       slb_finish_load_1T
 
-0:
+0:     /*
+        * For userspace addresses, make sure this is region 0.
+        */
+       cmpdi   r9, 0
+       bne     8f
+
        /* when using slices, we extract the psize off the slice bitmaps
         * and then we need to get the sllp encoding off the mmu_psize_defs
         * array.
index 8eb82b0..d93dd4a 100644 (file)
@@ -528,7 +528,6 @@ static struct platform_driver mpc512x_lpbfifo_driver = {
        .remove = mpc512x_lpbfifo_remove,
        .driver = {
                .name = DRV_NAME,
-               .owner = THIS_MODULE,
                .of_match_table = mpc512x_lpbfifo_match,
        },
 };
index dbcd030..63c5ab6 100644 (file)
@@ -222,7 +222,6 @@ static const struct of_device_id mcu_of_match_table[] = {
 static struct i2c_driver mcu_driver = {
        .driver = {
                .name = "mcu-mpc8349emitx",
-               .owner = THIS_MODULE,
                .of_match_table = mcu_of_match_table,
        },
        .probe = mcu_probe,
index dafba10..dfd3100 100644 (file)
@@ -26,7 +26,7 @@
 #include <linux/tty.h>
 #include <linux/serial_core.h>
 #include <linux/of_platform.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
index 80804f9..f97bab8 100644 (file)
@@ -23,7 +23,7 @@
 #include <linux/pci.h>
 #include <linux/kdev_t.h>
 #include <linux/console.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/delay.h>
 #include <linux/irq.h>
 #include <linux/seq_file.h>
index 2ee9643..4c82782 100644 (file)
@@ -370,6 +370,7 @@ static irqreturn_t process_dump(int irq, void *data)
        uint32_t dump_id, dump_size, dump_type;
        struct dump_obj *dump;
        char name[22];
+       struct kobject *kobj;
 
        rc = dump_read_info(&dump_id, &dump_size, &dump_type);
        if (rc != OPAL_SUCCESS)
@@ -381,8 +382,12 @@ static irqreturn_t process_dump(int irq, void *data)
         * that gracefully and not create two conflicting
         * entries.
         */
-       if (kset_find_obj(dump_kset, name))
+       kobj = kset_find_obj(dump_kset, name);
+       if (kobj) {
+               /* Drop reference added by kset_find_obj() */
+               kobject_put(kobj);
                return 0;
+       }
 
        dump = create_dump_obj(dump_id, dump_size, dump_type);
        if (!dump)
index 37f959b..f2344cb 100644 (file)
@@ -247,6 +247,7 @@ static irqreturn_t elog_event(int irq, void *data)
        uint64_t elog_type;
        int rc;
        char name[2+16+1];
+       struct kobject *kobj;
 
        rc = opal_get_elog_size(&id, &size, &type);
        if (rc != OPAL_SUCCESS) {
@@ -269,8 +270,12 @@ static irqreturn_t elog_event(int irq, void *data)
         * that gracefully and not create two conflicting
         * entries.
         */
-       if (kset_find_obj(elog_kset, name))
+       kobj = kset_find_obj(elog_kset, name);
+       if (kobj) {
+               /* Drop reference added by kset_find_obj() */
+               kobject_put(kobj);
                return IRQ_HANDLED;
+       }
 
        create_elog_obj(log_id, elog_size, elog_type);
 
index fd9444f..c16d790 100644 (file)
@@ -149,7 +149,7 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
 
 static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
 {
-       unsigned long pe = phb->ioda.total_pe_num - 1;
+       long pe;
 
        for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) {
                if (!test_and_set_bit(pe, phb->ioda.pe_alloc))
@@ -162,11 +162,12 @@ static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
 static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
 {
        struct pnv_phb *phb = pe->phb;
+       unsigned int pe_num = pe->pe_number;
 
        WARN_ON(pe->pdev);
 
        memset(pe, 0, sizeof(struct pnv_ioda_pe));
-       clear_bit(pe->pe_number, phb->ioda.pe_alloc);
+       clear_bit(pe_num, phb->ioda.pe_alloc);
 }
 
 /* The default M64 BAR is shared by all PEs */
@@ -3402,12 +3403,6 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
        struct pnv_phb *phb = pe->phb;
        struct pnv_ioda_pe *slave, *tmp;
 
-       /* Release slave PEs in compound PE */
-       if (pe->flags & PNV_IODA_PE_MASTER) {
-               list_for_each_entry_safe(slave, tmp, &pe->slaves, list)
-                       pnv_ioda_release_pe(slave);
-       }
-
        list_del(&pe->list);
        switch (phb->type) {
        case PNV_PHB_IODA1:
@@ -3422,6 +3417,15 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
 
        pnv_ioda_release_pe_seg(pe);
        pnv_ioda_deconfigure_pe(pe->phb, pe);
+
+       /* Release slave PEs in the compound PE */
+       if (pe->flags & PNV_IODA_PE_MASTER) {
+               list_for_each_entry_safe(slave, tmp, &pe->slaves, list) {
+                       list_del(&slave->list);
+                       pnv_ioda_free_pe(slave);
+               }
+       }
+
        pnv_ioda_free_pe(pe);
 }
 
index fe16a50..09eba5a 100644 (file)
@@ -119,6 +119,10 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
 
        bus = bridge->bus;
 
+       /* Rely on the pcibios_free_controller_deferred() callback. */
+       pci_set_host_bridge_release(bridge, pcibios_free_controller_deferred,
+                                       (void *) pci_bus_to_host(bus));
+
        dn = pcibios_get_phb_of_node(bus);
        if (!dn)
                return 0;
index 906dbaa..547fd13 100644 (file)
@@ -106,8 +106,11 @@ int remove_phb_dynamic(struct pci_controller *phb)
                release_resource(res);
        }
 
-       /* Free pci_controller data structure */
-       pcibios_free_controller(phb);
+       /*
+        * The pci_controller data structure is freed by
+        * the pcibios_free_controller_deferred() callback;
+        * see pseries_root_bridge_prepare().
+        */
 
        return 0;
 }
index 4ffcaa6..a39d20e 100644 (file)
@@ -41,7 +41,6 @@
 #include <linux/root_dev.h>
 #include <linux/of.h>
 #include <linux/of_pci.h>
-#include <linux/kexec.h>
 
 #include <asm/mmu.h>
 #include <asm/processor.h>
@@ -66,6 +65,7 @@
 #include <asm/eeh.h>
 #include <asm/reg.h>
 #include <asm/plpar_wrappers.h>
+#include <asm/kexec.h>
 
 #include "pseries.h"
 
index 6c11099..81d4947 100644 (file)
@@ -534,7 +534,8 @@ struct cpm1_gpio16_chip {
 
 static void cpm1_gpio16_save_regs(struct of_mm_gpio_chip *mm_gc)
 {
-       struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+       struct cpm1_gpio16_chip *cpm1_gc =
+               container_of(mm_gc, struct cpm1_gpio16_chip, mm_gc);
        struct cpm_ioport16 __iomem *iop = mm_gc->regs;
 
        cpm1_gc->cpdata = in_be16(&iop->dat);
@@ -649,7 +650,8 @@ struct cpm1_gpio32_chip {
 
 static void cpm1_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc)
 {
-       struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+       struct cpm1_gpio32_chip *cpm1_gc =
+               container_of(mm_gc, struct cpm1_gpio32_chip, mm_gc);
        struct cpm_ioport32b __iomem *iop = mm_gc->regs;
 
        cpm1_gc->cpdata = in_be32(&iop->dat);
index 911456d..947f420 100644 (file)
@@ -94,7 +94,8 @@ struct cpm2_gpio32_chip {
 
 static void cpm2_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc)
 {
-       struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(&mm_gc->gc);
+       struct cpm2_gpio32_chip *cpm2_gc =
+               container_of(mm_gc, struct cpm2_gpio32_chip, mm_gc);
        struct cpm2_ioports __iomem *iop = mm_gc->regs;
 
        cpm2_gc->cpdata = in_be32(&iop->dat);
index 68e7c0d..3cc7cac 100644 (file)
@@ -23,7 +23,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/types.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
index 57d72f1..9114243 100644 (file)
 
 static void icp_opal_teardown_cpu(void)
 {
-       int cpu = smp_processor_id();
+       int hw_cpu = hard_smp_processor_id();
 
        /* Clear any pending IPI */
-       opal_int_set_mfrr(cpu, 0xff);
+       opal_int_set_mfrr(hw_cpu, 0xff);
 }
 
 static void icp_opal_flush_ipi(void)
@@ -101,14 +101,16 @@ static void icp_opal_eoi(struct irq_data *d)
 
 static void icp_opal_cause_ipi(int cpu, unsigned long data)
 {
-       opal_int_set_mfrr(cpu, IPI_PRIORITY);
+       int hw_cpu = get_hard_smp_processor_id(cpu);
+
+       opal_int_set_mfrr(hw_cpu, IPI_PRIORITY);
 }
 
 static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id)
 {
-       int cpu = smp_processor_id();
+       int hw_cpu = hard_smp_processor_id();
 
-       opal_int_set_mfrr(cpu, 0xff);
+       opal_int_set_mfrr(hw_cpu, 0xff);
 
        return smp_ipi_demux();
 }
index e751fe2..c109f07 100644 (file)
@@ -68,7 +68,6 @@ config DEBUG_RODATA
 config S390
        def_bool y
        select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
-       select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
        select ARCH_HAS_DEVMEM_IS_ALLOWED
        select ARCH_HAS_ELF_RANDOMIZE
        select ARCH_HAS_GCOV_PROFILE_ALL
index 26e0c7f..412b1bd 100644 (file)
@@ -602,7 +602,6 @@ CONFIG_FAIL_FUTEX=y
 CONFIG_FAULT_INJECTION_DEBUG_FS=y
 CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
 CONFIG_LATENCYTOP=y
-CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
 CONFIG_IRQSOFF_TRACER=y
 CONFIG_PREEMPT_TRACER=y
 CONFIG_SCHED_TRACER=y
index 24879da..bec279e 100644 (file)
@@ -552,7 +552,6 @@ CONFIG_NOTIFIER_ERROR_INJECTION=m
 CONFIG_CPU_NOTIFIER_ERROR_INJECT=m
 CONFIG_PM_NOTIFIER_ERROR_INJECT=m
 CONFIG_LATENCYTOP=y
-CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
 CONFIG_BLK_DEV_IO_TRACE=y
 # CONFIG_KPROBE_EVENT is not set
 CONFIG_TRACE_ENUM_MAP_FILE=y
index a5c1e5f..1751446 100644 (file)
@@ -549,7 +549,6 @@ CONFIG_TIMER_STATS=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
 CONFIG_LATENCYTOP=y
-CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
 CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_STACK_TRACER=y
index 73610f2..2d40ef0 100644 (file)
@@ -172,7 +172,6 @@ CONFIG_DEBUG_NOTIFIERS=y
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
 CONFIG_RCU_TRACE=y
 CONFIG_LATENCYTOP=y
-CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
 CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
index 9b49cf1..95aefdb 100644 (file)
@@ -311,6 +311,14 @@ int __get_user_bad(void) __attribute__((noreturn));
 #define __put_user_unaligned __put_user
 #define __get_user_unaligned __get_user
 
+extern void __compiletime_error("usercopy buffer size is too small")
+__bad_copy_user(void);
+
+static inline void copy_user_overflow(int size, unsigned long count)
+{
+       WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
+}
+
 /**
  * copy_to_user: - Copy a block of data into user space.
  * @to:   Destination address, in user space.
@@ -332,12 +340,6 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
        return __copy_to_user(to, from, n);
 }
 
-void copy_from_user_overflow(void)
-#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
-__compiletime_warning("copy_from_user() buffer size is not provably correct")
-#endif
-;
-
 /**
  * copy_from_user: - Copy a block of data from user space.
  * @to:   Destination address, in kernel space.
@@ -362,7 +364,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
 
        might_fault();
        if (unlikely(sz != -1 && sz < n)) {
-               copy_from_user_overflow();
+               if (!__builtin_constant_p(n))
+                       copy_user_overflow(sz, n);
+               else
+                       __bad_copy_user();
                return n;
        }
        return __copy_from_user(to, from, n);
index ba5f456..7f7ba5f 100644 (file)
@@ -204,11 +204,9 @@ static void __init conmode_default(void)
 #endif
                }
        } else if (MACHINE_IS_KVM) {
-               if (sclp.has_vt220 &&
-                   config_enabled(CONFIG_SCLP_VT220_CONSOLE))
+               if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
                        SET_CONSOLE_VT220;
-               else if (sclp.has_linemode &&
-                        config_enabled(CONFIG_SCLP_CONSOLE))
+               else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
                        SET_CONSOLE_SCLP;
                else
                        SET_CONSOLE_HVC;
index 341a5a1..e722c51 100644 (file)
@@ -249,8 +249,7 @@ unsigned long __copy_user(void __user *to, const void __user *from, unsigned lon
 static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
 {
        if (n && __access_ok((unsigned long) to, n)) {
-               if (!__builtin_constant_p(n))
-                       check_object_size(from, n, true);
+               check_object_size(from, n, true);
                return __copy_user(to, (__force void __user *) from, n);
        } else
                return n;
@@ -258,16 +257,14 @@ static inline unsigned long copy_to_user(void __user *to, const void *from, unsi
 
 static inline unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-       if (!__builtin_constant_p(n))
-               check_object_size(from, n, true);
+       check_object_size(from, n, true);
        return __copy_user(to, (__force void __user *) from, n);
 }
 
 static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
 {
        if (n && __access_ok((unsigned long) from, n)) {
-               if (!__builtin_constant_p(n))
-                       check_object_size(to, n, false);
+               check_object_size(to, n, false);
                return __copy_user((__force void __user *) to, from, n);
        } else
                return n;
index 8bda94f..37a315d 100644 (file)
@@ -212,8 +212,7 @@ copy_from_user(void *to, const void __user *from, unsigned long size)
 {
        unsigned long ret;
 
-       if (!__builtin_constant_p(size))
-               check_object_size(to, size, false);
+       check_object_size(to, size, false);
 
        ret = ___copy_from_user(to, from, size);
        if (unlikely(ret))
@@ -233,8 +232,8 @@ copy_to_user(void __user *to, const void *from, unsigned long size)
 {
        unsigned long ret;
 
-       if (!__builtin_constant_p(size))
-               check_object_size(from, size, true);
+       check_object_size(from, size, true);
+
        ret = ___copy_to_user(to, from, size);
        if (unlikely(ret))
                ret = copy_to_user_fixup(to, from, size);
index 4820a02..78da75b 100644 (file)
@@ -4,7 +4,6 @@
 config TILE
        def_bool y
        select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
-       select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
        select ARCH_HAS_DEVMEM_IS_ALLOWED
        select ARCH_HAVE_NMI_SAFE_CMPXCHG
        select ARCH_WANT_FRAME_POINTERS
index 0a9c426..a77369e 100644 (file)
@@ -416,14 +416,13 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
        return n;
 }
 
-#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
-/*
- * There are still unprovable places in the generic code as of 2.6.34, so this
- * option is not really compatible with -Werror, which is more useful in
- * general.
- */
-extern void copy_from_user_overflow(void)
-       __compiletime_warning("copy_from_user() size is not provably correct");
+extern void __compiletime_error("usercopy buffer size is too small")
+__bad_copy_user(void);
+
+static inline void copy_user_overflow(int size, unsigned long count)
+{
+       WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
+}
 
 static inline unsigned long __must_check copy_from_user(void *to,
                                          const void __user *from,
@@ -433,14 +432,13 @@ static inline unsigned long __must_check copy_from_user(void *to,
 
        if (likely(sz == -1 || sz >= n))
                n = _copy_from_user(to, from, n);
+       else if (!__builtin_constant_p(n))
+               copy_user_overflow(sz, n);
        else
-               copy_from_user_overflow();
+               __bad_copy_user();
 
        return n;
 }
-#else
-#define copy_from_user _copy_from_user
-#endif
 
 #ifdef __tilegx__
 /**
index 1dd5bd8..1330553 100644 (file)
@@ -81,7 +81,7 @@
   .altinstr_replacement : { *(.altinstr_replacement) }
   /* .exit.text is discard at runtime, not link time, to deal with references
      from .altinstructions and .eh_frame */
-  .exit.text : { *(.exit.text) }
+  .exit.text : { EXIT_TEXT }
   .exit.data : { *(.exit.data) }
 
   .preinit_array : {
index ef4b8f9..b783ac8 100644 (file)
@@ -21,21 +21,17 @@ void handle_syscall(struct uml_pt_regs *r)
        PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS);
 
        if (syscall_trace_enter(regs))
-               return;
+               goto out;
 
        /* Do the seccomp check after ptrace; failures should be fast. */
        if (secure_computing(NULL) == -1)
-               return;
+               goto out;
 
-       /* Update the syscall number after orig_ax has potentially been updated
-        * with ptrace.
-        */
-       UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp);
        syscall = UPT_SYSCALL_NR(r);
-
        if (syscall >= 0 && syscall <= __NR_syscall_max)
                PT_REGS_SET_SYSCALL_RETURN(regs,
                                EXECUTE_SYSCALL(syscall, regs));
 
+out:
        syscall_trace_leave(regs);
 }
index c580d8c..2a1f0ce 100644 (file)
@@ -24,7 +24,6 @@ config X86
        select ARCH_DISCARD_MEMBLOCK
        select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
        select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
-       select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
        select ARCH_HAS_DEVMEM_IS_ALLOWED
        select ARCH_HAS_ELF_RANDOMIZE
        select ARCH_HAS_FAST_MULTIPLIER
index 4e2ecfa..4b429df 100644 (file)
@@ -1 +1,3 @@
 CONFIG_NOHIGHMEM=y
+# CONFIG_HIGHMEM4G is not set
+# CONFIG_HIGHMEM64G is not set
index 89fa85e..6f97fb3 100644 (file)
@@ -485,10 +485,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
 
                        req = cast_mcryptd_ctx_to_req(req_ctx);
                        if (irqs_disabled())
-                               rctx->complete(&req->base, ret);
+                               req_ctx->complete(&req->base, ret);
                        else {
                                local_bh_disable();
-                               rctx->complete(&req->base, ret);
+                               req_ctx->complete(&req->base, ret);
                                local_bh_enable();
                        }
                }
index b691da9..a78a069 100644 (file)
@@ -265,13 +265,14 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2)
        vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
        vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
        vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
-       movl    _args_digest+4*32(state, idx, 4), tmp2_w
+       vmovd   _args_digest(state , idx, 4) , %xmm0
        vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
        vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
        vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
 
-       vmovdqu %xmm0, _result_digest(job_rax)
-       movl    tmp2_w, _result_digest+1*16(job_rax)
+        vmovdqu %xmm0, _result_digest(job_rax)
+        offset =  (_result_digest + 1*16)
+        vmovdqu %xmm1, offset(job_rax)
 
        pop     %rbx
 
index f4cf5b7..d210174 100644 (file)
@@ -497,10 +497,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
 
                        req = cast_mcryptd_ctx_to_req(req_ctx);
                        if (irqs_disabled())
-                               rctx->complete(&req->base, ret);
+                               req_ctx->complete(&req->base, ret);
                        else {
                                local_bh_disable();
-                               rctx->complete(&req->base, ret);
+                               req_ctx->complete(&req->base, ret);
                                local_bh_enable();
                        }
                }
index a0ae610..e3af86f 100644 (file)
@@ -697,44 +697,15 @@ unsigned long __must_check _copy_from_user(void *to, const void __user *from,
 unsigned long __must_check _copy_to_user(void __user *to, const void *from,
                                         unsigned n);
 
-#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
-# define copy_user_diag __compiletime_error
-#else
-# define copy_user_diag __compiletime_warning
-#endif
-
-extern void copy_user_diag("copy_from_user() buffer size is too small")
-copy_from_user_overflow(void);
-extern void copy_user_diag("copy_to_user() buffer size is too small")
-copy_to_user_overflow(void) __asm__("copy_from_user_overflow");
-
-#undef copy_user_diag
-
-#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
-
-extern void
-__compiletime_warning("copy_from_user() buffer size is not provably correct")
-__copy_from_user_overflow(void) __asm__("copy_from_user_overflow");
-#define __copy_from_user_overflow(size, count) __copy_from_user_overflow()
-
-extern void
-__compiletime_warning("copy_to_user() buffer size is not provably correct")
-__copy_to_user_overflow(void) __asm__("copy_from_user_overflow");
-#define __copy_to_user_overflow(size, count) __copy_to_user_overflow()
-
-#else
+extern void __compiletime_error("usercopy buffer size is too small")
+__bad_copy_user(void);
 
-static inline void
-__copy_from_user_overflow(int size, unsigned long count)
+static inline void copy_user_overflow(int size, unsigned long count)
 {
        WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
 }
 
-#define __copy_to_user_overflow __copy_from_user_overflow
-
-#endif
-
-static inline unsigned long __must_check
+static __always_inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long n)
 {
        int sz = __compiletime_object_size(to);
@@ -743,36 +714,18 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
 
        kasan_check_write(to, n);
 
-       /*
-        * While we would like to have the compiler do the checking for us
-        * even in the non-constant size case, any false positives there are
-        * a problem (especially when DEBUG_STRICT_USER_COPY_CHECKS, but even
-        * without - the [hopefully] dangerous looking nature of the warning
-        * would make people go look at the respecitive call sites over and
-        * over again just to find that there's no problem).
-        *
-        * And there are cases where it's just not realistic for the compiler
-        * to prove the count to be in range. For example when multiple call
-        * sites of a helper function - perhaps in different source files -
-        * all doing proper range checking, yet the helper function not doing
-        * so again.
-        *
-        * Therefore limit the compile time checking to the constant size
-        * case, and do only runtime checking for non-constant sizes.
-        */
-
        if (likely(sz < 0 || sz >= n)) {
                check_object_size(to, n, false);
                n = _copy_from_user(to, from, n);
-       } else if (__builtin_constant_p(n))
-               copy_from_user_overflow();
+       } else if (!__builtin_constant_p(n))
+               copy_user_overflow(sz, n);
        else
-               __copy_from_user_overflow(sz, n);
+               __bad_copy_user();
 
        return n;
 }
 
-static inline unsigned long __must_check
+static __always_inline unsigned long __must_check
 copy_to_user(void __user *to, const void *from, unsigned long n)
 {
        int sz = __compiletime_object_size(from);
@@ -781,21 +734,17 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
 
        might_fault();
 
-       /* See the comment in copy_from_user() above. */
        if (likely(sz < 0 || sz >= n)) {
                check_object_size(from, n, true);
                n = _copy_to_user(to, from, n);
-       } else if (__builtin_constant_p(n))
-               copy_to_user_overflow();
+       } else if (!__builtin_constant_p(n))
+               copy_user_overflow(sz, n);
        else
-               __copy_to_user_overflow(sz, n);
+               __bad_copy_user();
 
        return n;
 }
 
-#undef __copy_from_user_overflow
-#undef __copy_to_user_overflow
-
 /*
  * We rely on the nested NMI work to allow atomic faults from the NMI path; the
  * nested NMI paths are careful to preserve CR2.
index cea4fc1..50c95af 100644 (file)
@@ -1623,6 +1623,9 @@ void __init enable_IR_x2apic(void)
        unsigned long flags;
        int ret, ir_stat;
 
+       if (skip_ioapic_setup)
+               return;
+
        ir_stat = irq_remapping_prepare();
        if (ir_stat < 0 && !x2apic_supported())
                return;
index f5c69d8..b81fe2d 100644 (file)
@@ -669,6 +669,17 @@ static void init_amd_gh(struct cpuinfo_x86 *c)
                set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
 }
 
+#define MSR_AMD64_DE_CFG       0xC0011029
+
+static void init_amd_ln(struct cpuinfo_x86 *c)
+{
+       /*
+        * Apply erratum 665 fix unconditionally so machines without a BIOS
+        * fix work.
+        */
+       msr_set_bit(MSR_AMD64_DE_CFG, 31);
+}
+
 static void init_amd_bd(struct cpuinfo_x86 *c)
 {
        u64 value;
@@ -726,6 +737,7 @@ static void init_amd(struct cpuinfo_x86 *c)
        case 6:    init_amd_k7(c); break;
        case 0xf:  init_amd_k8(c); break;
        case 0x10: init_amd_gh(c); break;
+       case 0x12: init_amd_ln(c); break;
        case 0x15: init_amd_bd(c); break;
        }
 
index 27a0228..b816971 100644 (file)
@@ -355,6 +355,7 @@ void load_ucode_amd_ap(void)
        unsigned int cpu = smp_processor_id();
        struct equiv_cpu_entry *eq;
        struct microcode_amd *mc;
+       u8 *cont = container;
        u32 rev, eax;
        u16 eq_id;
 
@@ -371,8 +372,11 @@ void load_ucode_amd_ap(void)
        if (check_current_patch_level(&rev, false))
                return;
 
+       /* Add CONFIG_RANDOMIZE_MEMORY offset. */
+       cont += PAGE_OFFSET - __PAGE_OFFSET_BASE;
+
        eax = cpuid_eax(0x00000001);
-       eq  = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ);
+       eq  = (struct equiv_cpu_entry *)(cont + CONTAINER_HDR_SZ);
 
        eq_id = find_equiv_id(eq, eax);
        if (!eq_id)
@@ -434,6 +438,9 @@ int __init save_microcode_in_initrd_amd(void)
        else
                container = cont_va;
 
+       /* Add CONFIG_RANDOMIZE_MEMORY offset. */
+       container += PAGE_OFFSET - __PAGE_OFFSET_BASE;
+
        eax   = cpuid_eax(0x00000001);
        eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
 
index ad5bc95..1acfd76 100644 (file)
@@ -56,12 +56,12 @@ asm (".pushsection .entry.text, \"ax\"\n"
      ".popsection");
 
 /* identity function, which can be inlined */
-u32 _paravirt_ident_32(u32 x)
+u32 notrace _paravirt_ident_32(u32 x)
 {
        return x;
 }
 
-u64 _paravirt_ident_64(u64 x)
+u64 notrace _paravirt_ident_64(u64 x)
 {
        return x;
 }
index 2a6e84a..4296beb 100644 (file)
@@ -100,10 +100,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
 /* Logical package management. We might want to allocate that dynamically */
 static int *physical_to_logical_pkg __read_mostly;
 static unsigned long *physical_package_map __read_mostly;;
-static unsigned long *logical_package_map  __read_mostly;
 static unsigned int max_physical_pkg_id __read_mostly;
 unsigned int __max_logical_packages __read_mostly;
 EXPORT_SYMBOL(__max_logical_packages);
+static unsigned int logical_packages __read_mostly;
+static bool logical_packages_frozen __read_mostly;
 
 /* Maximum number of SMT threads on any online core */
 int __max_smt_threads __read_mostly;
@@ -277,14 +278,14 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu)
        if (test_and_set_bit(pkg, physical_package_map))
                goto found;
 
-       new = find_first_zero_bit(logical_package_map, __max_logical_packages);
-       if (new >= __max_logical_packages) {
+       if (logical_packages_frozen) {
                physical_to_logical_pkg[pkg] = -1;
-               pr_warn("APIC(%x) Package %u exceeds logical package map\n",
+               pr_warn("APIC(%x) Package %u exceeds logical package max\n",
                        apicid, pkg);
                return -ENOSPC;
        }
-       set_bit(new, logical_package_map);
+
+       new = logical_packages++;
        pr_info("APIC(%x) Converting physical %u to logical package %u\n",
                apicid, pkg, new);
        physical_to_logical_pkg[pkg] = new;
@@ -341,6 +342,7 @@ static void __init smp_init_package_map(void)
        }
 
        __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
+       logical_packages = 0;
 
        /*
         * Possibly larger than what we need as the number of apic ids per
@@ -352,10 +354,6 @@ static void __init smp_init_package_map(void)
        memset(physical_to_logical_pkg, 0xff, size);
        size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
        physical_package_map = kzalloc(size, GFP_KERNEL);
-       size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long);
-       logical_package_map = kzalloc(size, GFP_KERNEL);
-
-       pr_info("Max logical packages: %u\n", __max_logical_packages);
 
        for_each_present_cpu(cpu) {
                unsigned int apicid = apic->cpu_present_to_apicid(cpu);
@@ -369,6 +367,15 @@ static void __init smp_init_package_map(void)
                set_cpu_possible(cpu, false);
                set_cpu_present(cpu, false);
        }
+
+       if (logical_packages > __max_logical_packages) {
+               pr_warn("Detected more packages (%u), then computed by BIOS data (%u).\n",
+                       logical_packages, __max_logical_packages);
+               logical_packages_frozen = true;
+               __max_logical_packages  = logical_packages;
+       }
+
+       pr_info("Max logical packages: %u\n", __max_logical_packages);
 }
 
 void __init smp_store_boot_cpu_info(void)
index a45d858..5cede40 100644 (file)
@@ -422,6 +422,7 @@ struct nested_vmx {
        struct list_head vmcs02_pool;
        int vmcs02_num;
        u64 vmcs01_tsc_offset;
+       bool change_vmcs01_virtual_x2apic_mode;
        /* L2 must run next, and mustn't decide to exit to L1. */
        bool nested_run_pending;
        /*
@@ -435,6 +436,8 @@ struct nested_vmx {
        bool pi_pending;
        u16 posted_intr_nv;
 
+       unsigned long *msr_bitmap;
+
        struct hrtimer preemption_timer;
        bool preemption_timer_expired;
 
@@ -924,7 +927,6 @@ static unsigned long *vmx_msr_bitmap_legacy;
 static unsigned long *vmx_msr_bitmap_longmode;
 static unsigned long *vmx_msr_bitmap_legacy_x2apic;
 static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_nested;
 static unsigned long *vmx_vmread_bitmap;
 static unsigned long *vmx_vmwrite_bitmap;
 
@@ -2198,6 +2200,12 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
                        new.control) != old.control);
 }
 
+static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
+{
+       vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
+       vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
+}
+
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -2256,10 +2264,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
        /* Setup TSC multiplier */
        if (kvm_has_tsc_control &&
-           vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) {
-               vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio;
-               vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
-       }
+           vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
+               decache_tsc_multiplier(vmx);
 
        vmx_vcpu_pi_load(vcpu, cpu);
        vmx->host_pkru = read_pkru();
@@ -2508,7 +2514,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
        unsigned long *msr_bitmap;
 
        if (is_guest_mode(vcpu))
-               msr_bitmap = vmx_msr_bitmap_nested;
+               msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
        else if (cpu_has_secondary_exec_ctrls() &&
                 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
                  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
@@ -6363,13 +6369,6 @@ static __init int hardware_setup(void)
        if (!vmx_msr_bitmap_longmode_x2apic)
                goto out4;
 
-       if (nested) {
-               vmx_msr_bitmap_nested =
-                       (unsigned long *)__get_free_page(GFP_KERNEL);
-               if (!vmx_msr_bitmap_nested)
-                       goto out5;
-       }
-
        vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
        if (!vmx_vmread_bitmap)
                goto out6;
@@ -6392,8 +6391,6 @@ static __init int hardware_setup(void)
 
        memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
        memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-       if (nested)
-               memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);
 
        if (setup_vmcs_config(&vmcs_config) < 0) {
                r = -EIO;
@@ -6529,9 +6526,6 @@ out8:
 out7:
        free_page((unsigned long)vmx_vmread_bitmap);
 out6:
-       if (nested)
-               free_page((unsigned long)vmx_msr_bitmap_nested);
-out5:
        free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
 out4:
        free_page((unsigned long)vmx_msr_bitmap_longmode);
@@ -6557,8 +6551,6 @@ static __exit void hardware_unsetup(void)
        free_page((unsigned long)vmx_io_bitmap_a);
        free_page((unsigned long)vmx_vmwrite_bitmap);
        free_page((unsigned long)vmx_vmread_bitmap);
-       if (nested)
-               free_page((unsigned long)vmx_msr_bitmap_nested);
 
        free_kvm_area();
 }
@@ -6995,16 +6987,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
                return 1;
        }
 
+       if (cpu_has_vmx_msr_bitmap()) {
+               vmx->nested.msr_bitmap =
+                               (unsigned long *)__get_free_page(GFP_KERNEL);
+               if (!vmx->nested.msr_bitmap)
+                       goto out_msr_bitmap;
+       }
+
        vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
        if (!vmx->nested.cached_vmcs12)
-               return -ENOMEM;
+               goto out_cached_vmcs12;
 
        if (enable_shadow_vmcs) {
                shadow_vmcs = alloc_vmcs();
-               if (!shadow_vmcs) {
-                       kfree(vmx->nested.cached_vmcs12);
-                       return -ENOMEM;
-               }
+               if (!shadow_vmcs)
+                       goto out_shadow_vmcs;
                /* mark vmcs as shadow */
                shadow_vmcs->revision_id |= (1u << 31);
                /* init shadow vmcs */
@@ -7024,6 +7021,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
        skip_emulated_instruction(vcpu);
        nested_vmx_succeed(vcpu);
        return 1;
+
+out_shadow_vmcs:
+       kfree(vmx->nested.cached_vmcs12);
+
+out_cached_vmcs12:
+       free_page((unsigned long)vmx->nested.msr_bitmap);
+
+out_msr_bitmap:
+       return -ENOMEM;
 }
 
 /*
@@ -7098,6 +7104,10 @@ static void free_nested(struct vcpu_vmx *vmx)
        vmx->nested.vmxon = false;
        free_vpid(vmx->nested.vpid02);
        nested_release_vmcs12(vmx);
+       if (vmx->nested.msr_bitmap) {
+               free_page((unsigned long)vmx->nested.msr_bitmap);
+               vmx->nested.msr_bitmap = NULL;
+       }
        if (enable_shadow_vmcs)
                free_vmcs(vmx->nested.current_shadow_vmcs);
        kfree(vmx->nested.cached_vmcs12);
@@ -8419,6 +8429,12 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 {
        u32 sec_exec_control;
 
+       /* Postpone execution until vmcs01 is the current VMCS. */
+       if (is_guest_mode(vcpu)) {
+               to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true;
+               return;
+       }
+
        /*
         * There is not point to enable virtualize x2apic without enable
         * apicv
@@ -9472,8 +9488,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
 {
        int msr;
        struct page *page;
-       unsigned long *msr_bitmap;
+       unsigned long *msr_bitmap_l1;
+       unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
 
+       /* This shortcut is ok because we support only x2APIC MSRs so far. */
        if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
                return false;
 
@@ -9482,63 +9500,37 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
                WARN_ON(1);
                return false;
        }
-       msr_bitmap = (unsigned long *)kmap(page);
-       if (!msr_bitmap) {
+       msr_bitmap_l1 = (unsigned long *)kmap(page);
+       if (!msr_bitmap_l1) {
                nested_release_page_clean(page);
                WARN_ON(1);
                return false;
        }
 
+       memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
+
        if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
                if (nested_cpu_has_apic_reg_virt(vmcs12))
                        for (msr = 0x800; msr <= 0x8ff; msr++)
                                nested_vmx_disable_intercept_for_msr(
-                                       msr_bitmap,
-                                       vmx_msr_bitmap_nested,
+                                       msr_bitmap_l1, msr_bitmap_l0,
                                        msr, MSR_TYPE_R);
-               /* TPR is allowed */
-               nested_vmx_disable_intercept_for_msr(msr_bitmap,
-                               vmx_msr_bitmap_nested,
+
+               nested_vmx_disable_intercept_for_msr(
+                               msr_bitmap_l1, msr_bitmap_l0,
                                APIC_BASE_MSR + (APIC_TASKPRI >> 4),
                                MSR_TYPE_R | MSR_TYPE_W);
+
                if (nested_cpu_has_vid(vmcs12)) {
-                       /* EOI and self-IPI are allowed */
                        nested_vmx_disable_intercept_for_msr(
-                               msr_bitmap,
-                               vmx_msr_bitmap_nested,
+                               msr_bitmap_l1, msr_bitmap_l0,
                                APIC_BASE_MSR + (APIC_EOI >> 4),
                                MSR_TYPE_W);
                        nested_vmx_disable_intercept_for_msr(
-                               msr_bitmap,
-                               vmx_msr_bitmap_nested,
+                               msr_bitmap_l1, msr_bitmap_l0,
                                APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
                                MSR_TYPE_W);
                }
-       } else {
-               /*
-                * Enable reading intercept of all the x2apic
-                * MSRs. We should not rely on vmcs12 to do any
-                * optimizations here, it may have been modified
-                * by L1.
-                */
-               for (msr = 0x800; msr <= 0x8ff; msr++)
-                       __vmx_enable_intercept_for_msr(
-                               vmx_msr_bitmap_nested,
-                               msr,
-                               MSR_TYPE_R);
-
-               __vmx_enable_intercept_for_msr(
-                               vmx_msr_bitmap_nested,
-                               APIC_BASE_MSR + (APIC_TASKPRI >> 4),
-                               MSR_TYPE_W);
-               __vmx_enable_intercept_for_msr(
-                               vmx_msr_bitmap_nested,
-                               APIC_BASE_MSR + (APIC_EOI >> 4),
-                               MSR_TYPE_W);
-               __vmx_enable_intercept_for_msr(
-                               vmx_msr_bitmap_nested,
-                               APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
-                               MSR_TYPE_W);
        }
        kunmap(page);
        nested_release_page_clean(page);
@@ -9957,10 +9949,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        }
 
        if (cpu_has_vmx_msr_bitmap() &&
-           exec_control & CPU_BASED_USE_MSR_BITMAPS) {
-               nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
-               /* MSR_BITMAP will be set by following vmx_set_efer. */
-       else
+           exec_control & CPU_BASED_USE_MSR_BITMAPS &&
+           nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
+               /* MSR_BITMAP will be set by following vmx_set_efer. */
+       else
                exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
 
        /*
@@ -10011,6 +10003,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                        vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
        else
                vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+       if (kvm_has_tsc_control)
+               decache_tsc_multiplier(vmx);
 
        if (enable_vpid) {
                /*
@@ -10767,6 +10761,14 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
        else
                vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
                              PIN_BASED_VMX_PREEMPTION_TIMER);
+       if (kvm_has_tsc_control)
+               decache_tsc_multiplier(vmx);
+
+       if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
+               vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
+               vmx_set_virtual_x2apic_mode(vcpu,
+                               vcpu->arch.apic_base & X2APIC_ENABLE);
+       }
 
        /* This is needed for same reason as it was needed in prepare_vmcs02 */
        vmx->host_rsp = 0;
index ec8654f..bda8d5e 100644 (file)
@@ -77,7 +77,7 @@ static inline unsigned long get_padding(struct kaslr_memory_region *region)
  */
 static inline bool kaslr_memory_enabled(void)
 {
-       return kaslr_enabled() && !config_enabled(CONFIG_KASAN);
+       return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN);
 }
 
 /* Initialize base and padding for each memory region randomized with KASLR */
index ecb1b69..170cc4f 100644 (file)
@@ -927,9 +927,10 @@ int track_pfn_copy(struct vm_area_struct *vma)
 }
 
 /*
- * prot is passed in as a parameter for the new mapping. If the vma has a
- * linear pfn mapping for the entire range reserve the entire vma range with
- * single reserve_pfn_range call.
+ * prot is passed in as a parameter for the new mapping. If the vma has
+ * a linear pfn mapping for the entire range, or no vma is provided,
+ * reserve the entire pfn + size range with single reserve_pfn_range
+ * call.
  */
 int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
                    unsigned long pfn, unsigned long addr, unsigned long size)
@@ -938,11 +939,12 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
        enum page_cache_mode pcm;
 
        /* reserve the whole chunk starting from paddr */
-       if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) {
+       if (!vma || (addr == vma->vm_start
+                               && size == (vma->vm_end - vma->vm_start))) {
                int ret;
 
                ret = reserve_pfn_range(paddr, size, prot, 0);
-               if (!ret)
+               if (ret == 0 && vma)
                        vma->vm_flags |= VM_PAT;
                return ret;
        }
@@ -997,7 +999,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
        resource_size_t paddr;
        unsigned long prot;
 
-       if (!(vma->vm_flags & VM_PAT))
+       if (vma && !(vma->vm_flags & VM_PAT))
                return;
 
        /* free the chunk starting from pfn or the whole chunk */
@@ -1011,7 +1013,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
                size = vma->vm_end - vma->vm_start;
        }
        free_pfn_range(paddr, size);
-       vma->vm_flags &= ~VM_PAT;
+       if (vma)
+               vma->vm_flags &= ~VM_PAT;
 }
 
 /*
index b814ca6..7948be3 100644 (file)
@@ -41,6 +41,7 @@ static DEFINE_RAW_SPINLOCK(list_lock);
  * @node:      list item for parent traversal.
  * @rcu:       RCU callback item for freeing.
  * @irq:       back pointer to parent.
+ * @enabled:   true if driver enabled IRQ
  * @virq:      the virtual IRQ value provided to the requesting driver.
  *
  * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to
@@ -50,6 +51,7 @@ struct vmd_irq {
        struct list_head        node;
        struct rcu_head         rcu;
        struct vmd_irq_list     *irq;
+       bool                    enabled;
        unsigned int            virq;
 };
 
@@ -122,7 +124,9 @@ static void vmd_irq_enable(struct irq_data *data)
        unsigned long flags;
 
        raw_spin_lock_irqsave(&list_lock, flags);
+       WARN_ON(vmdirq->enabled);
        list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list);
+       vmdirq->enabled = true;
        raw_spin_unlock_irqrestore(&list_lock, flags);
 
        data->chip->irq_unmask(data);
@@ -136,8 +140,10 @@ static void vmd_irq_disable(struct irq_data *data)
        data->chip->irq_mask(data);
 
        raw_spin_lock_irqsave(&list_lock, flags);
-       list_del_rcu(&vmdirq->node);
-       INIT_LIST_HEAD_RCU(&vmdirq->node);
+       if (vmdirq->enabled) {
+               list_del_rcu(&vmdirq->node);
+               vmdirq->enabled = false;
+       }
        raw_spin_unlock_irqrestore(&list_lock, flags);
 }
 
index a3e3ccc..9634557 100644 (file)
@@ -113,7 +113,7 @@ static int set_up_temporary_mappings(void)
                        return result;
        }
 
-       temp_level4_pgt = (unsigned long)pgd - __PAGE_OFFSET;
+       temp_level4_pgt = __pa(pgd);
        return 0;
 }
 
index ebd4dd6..a7ef7b1 100644 (file)
@@ -84,7 +84,10 @@ int putreg(struct task_struct *child, int regno, unsigned long value)
        case EAX:
        case EIP:
        case UESP:
+               break;
        case ORIG_EAX:
+               /* Update the syscall number. */
+               UPT_SYSCALL_NR(&child->thread.regs.regs) = value;
                break;
        case FS:
                if (value && (value & 3) != 3)
index faab418..0b5c184 100644 (file)
@@ -78,7 +78,11 @@ int putreg(struct task_struct *child, int regno, unsigned long value)
        case RSI:
        case RDI:
        case RBP:
+               break;
+
        case ORIG_RAX:
+               /* Update the syscall number. */
+               UPT_SYSCALL_NR(&child->thread.regs.regs) = value;
                break;
 
        case FS:
index 8ffb089..b86ebb1 100644 (file)
@@ -118,7 +118,7 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
 DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
 
 /* Linux <-> Xen vCPU id mapping */
-DEFINE_PER_CPU(int, xen_vcpu_id) = -1;
+DEFINE_PER_CPU(uint32_t, xen_vcpu_id);
 EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
 
 enum xen_domain_type xen_domain_type = XEN_NATIVE;
index f394775..aa73540 100644 (file)
@@ -667,18 +667,19 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
        bio->bi_iter.bi_sector  = bio_src->bi_iter.bi_sector;
        bio->bi_iter.bi_size    = bio_src->bi_iter.bi_size;
 
-       if (bio_op(bio) == REQ_OP_DISCARD)
-               goto integrity_clone;
-
-       if (bio_op(bio) == REQ_OP_WRITE_SAME) {
+       switch (bio_op(bio)) {
+       case REQ_OP_DISCARD:
+       case REQ_OP_SECURE_ERASE:
+               break;
+       case REQ_OP_WRITE_SAME:
                bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
-               goto integrity_clone;
+               break;
+       default:
+               bio_for_each_segment(bv, bio_src, iter)
+                       bio->bi_io_vec[bio->bi_vcnt++] = bv;
+               break;
        }
 
-       bio_for_each_segment(bv, bio_src, iter)
-               bio->bi_io_vec[bio->bi_vcnt++] = bv;
-
-integrity_clone:
        if (bio_integrity(bio_src)) {
                int ret;
 
@@ -1788,7 +1789,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
         * Discards need a mutable bio_vec to accommodate the payload
         * required by the DSM TRIM and UNMAP commands.
         */
-       if (bio_op(bio) == REQ_OP_DISCARD)
+       if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE)
                split = bio_clone_bioset(bio, gfp, bs);
        else
                split = bio_clone_fast(bio, gfp, bs);
index 999442e..36c7ac3 100644 (file)
@@ -515,7 +515,9 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
 
 void blk_set_queue_dying(struct request_queue *q)
 {
-       queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
+       spin_lock_irq(q->queue_lock);
+       queue_flag_set(QUEUE_FLAG_DYING, q);
+       spin_unlock_irq(q->queue_lock);
 
        if (q->mq_ops)
                blk_mq_wake_waiters(q);
index 3eec75a..2642e5f 100644 (file)
@@ -94,8 +94,30 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
        bool do_split = true;
        struct bio *new = NULL;
        const unsigned max_sectors = get_max_io_size(q, bio);
+       unsigned bvecs = 0;
 
        bio_for_each_segment(bv, bio, iter) {
+               /*
+                * With arbitrary bio size, the incoming bio may be very
+                * big. We have to split the bio into small bios so that
+                * each holds at most BIO_MAX_PAGES bvecs because
+                * bio_clone() can fail to allocate big bvecs.
+                *
+                * It should have been better to apply the limit per
+                * request queue in which bio_clone() is involved,
+                * instead of globally. The biggest blocker is the
+                * bio_clone() in bio bounce.
+                *
+                * If bio is splitted by this reason, we should have
+                * allowed to continue bios merging, but don't do
+                * that now for making the change simple.
+                *
+                * TODO: deal with bio bounce's bio_clone() gracefully
+                * and convert the global limit into per-queue limit.
+                */
+               if (bvecs++ >= BIO_MAX_PAGES)
+                       goto split;
+
                /*
                 * If the queue doesn't support SG gaps and adding this
                 * offset would create a gap, disallow it.
@@ -172,12 +194,18 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
        struct bio *split, *res;
        unsigned nsegs;
 
-       if (bio_op(*bio) == REQ_OP_DISCARD)
+       switch (bio_op(*bio)) {
+       case REQ_OP_DISCARD:
+       case REQ_OP_SECURE_ERASE:
                split = blk_bio_discard_split(q, *bio, bs, &nsegs);
-       else if (bio_op(*bio) == REQ_OP_WRITE_SAME)
+               break;
+       case REQ_OP_WRITE_SAME:
                split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
-       else
+               break;
+       default:
                split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
+               break;
+       }
 
        /* physical segments can be figured out during splitting */
        res = split ? split : *bio;
@@ -213,7 +241,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
         * This should probably be returning 0, but blk_add_request_payload()
         * (Christoph!!!!)
         */
-       if (bio_op(bio) == REQ_OP_DISCARD)
+       if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE)
                return 1;
 
        if (bio_op(bio) == REQ_OP_WRITE_SAME)
@@ -385,7 +413,9 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
        nsegs = 0;
        cluster = blk_queue_cluster(q);
 
-       if (bio_op(bio) == REQ_OP_DISCARD) {
+       switch (bio_op(bio)) {
+       case REQ_OP_DISCARD:
+       case REQ_OP_SECURE_ERASE:
                /*
                 * This is a hack - drivers should be neither modifying the
                 * biovec, nor relying on bi_vcnt - but because of
@@ -393,19 +423,16 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
                 * a payload we need to set up here (thank you Christoph) and
                 * bi_vcnt is really the only way of telling if we need to.
                 */
-
-               if (bio->bi_vcnt)
-                       goto single_segment;
-
-               return 0;
-       }
-
-       if (bio_op(bio) == REQ_OP_WRITE_SAME) {
-single_segment:
+               if (!bio->bi_vcnt)
+                       return 0;
+               /* Fall through */
+       case REQ_OP_WRITE_SAME:
                *sg = sglist;
                bvec = bio_iovec(bio);
                sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
                return 1;
+       default:
+               break;
        }
 
        for_each_bio(bio)
index e931a0e..13f5a6c 100644 (file)
@@ -793,11 +793,12 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
        struct list_head *dptr;
        int queued;
 
-       WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask));
-
        if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
                return;
 
+       WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
+               cpu_online(hctx->next_cpu));
+
        hctx->run++;
 
        /*
@@ -1036,10 +1037,11 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
 EXPORT_SYMBOL(blk_mq_delay_queue);
 
 static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
-                                           struct blk_mq_ctx *ctx,
                                            struct request *rq,
                                            bool at_head)
 {
+       struct blk_mq_ctx *ctx = rq->mq_ctx;
+
        trace_block_rq_insert(hctx->queue, rq);
 
        if (at_head)
@@ -1053,20 +1055,16 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
 {
        struct blk_mq_ctx *ctx = rq->mq_ctx;
 
-       __blk_mq_insert_req_list(hctx, ctx, rq, at_head);
+       __blk_mq_insert_req_list(hctx, rq, at_head);
        blk_mq_hctx_mark_pending(hctx, ctx);
 }
 
 void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
-               bool async)
+                          bool async)
 {
+       struct blk_mq_ctx *ctx = rq->mq_ctx;
        struct request_queue *q = rq->q;
        struct blk_mq_hw_ctx *hctx;
-       struct blk_mq_ctx *ctx = rq->mq_ctx, *current_ctx;
-
-       current_ctx = blk_mq_get_ctx(q);
-       if (!cpu_online(ctx->cpu))
-               rq->mq_ctx = ctx = current_ctx;
 
        hctx = q->mq_ops->map_queue(q, ctx->cpu);
 
@@ -1076,8 +1074,6 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
 
        if (run_queue)
                blk_mq_run_hw_queue(hctx, async);
-
-       blk_mq_put_ctx(current_ctx);
 }
 
 static void blk_mq_insert_requests(struct request_queue *q,
@@ -1088,14 +1084,9 @@ static void blk_mq_insert_requests(struct request_queue *q,
 
 {
        struct blk_mq_hw_ctx *hctx;
-       struct blk_mq_ctx *current_ctx;
 
        trace_block_unplug(q, depth, !from_schedule);
 
-       current_ctx = blk_mq_get_ctx(q);
-
-       if (!cpu_online(ctx->cpu))
-               ctx = current_ctx;
        hctx = q->mq_ops->map_queue(q, ctx->cpu);
 
        /*
@@ -1107,15 +1098,14 @@ static void blk_mq_insert_requests(struct request_queue *q,
                struct request *rq;
 
                rq = list_first_entry(list, struct request, queuelist);
+               BUG_ON(rq->mq_ctx != ctx);
                list_del_init(&rq->queuelist);
-               rq->mq_ctx = ctx;
-               __blk_mq_insert_req_list(hctx, ctx, rq, false);
+               __blk_mq_insert_req_list(hctx, rq, false);
        }
        blk_mq_hctx_mark_pending(hctx, ctx);
        spin_unlock(&ctx->lock);
 
        blk_mq_run_hw_queue(hctx, from_schedule);
-       blk_mq_put_ctx(current_ctx);
 }
 
 static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
@@ -1630,16 +1620,17 @@ static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node)
        return 0;
 }
 
+/*
+ * 'cpu' is going away. splice any existing rq_list entries from this
+ * software queue to the hw queue dispatch list, and ensure that it
+ * gets run.
+ */
 static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
 {
-       struct request_queue *q = hctx->queue;
        struct blk_mq_ctx *ctx;
        LIST_HEAD(tmp);
 
-       /*
-        * Move ctx entries to new CPU, if this one is going away.
-        */
-       ctx = __blk_mq_get_ctx(q, cpu);
+       ctx = __blk_mq_get_ctx(hctx->queue, cpu);
 
        spin_lock(&ctx->lock);
        if (!list_empty(&ctx->rq_list)) {
@@ -1651,24 +1642,11 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
        if (list_empty(&tmp))
                return NOTIFY_OK;
 
-       ctx = blk_mq_get_ctx(q);
-       spin_lock(&ctx->lock);
-
-       while (!list_empty(&tmp)) {
-               struct request *rq;
-
-               rq = list_first_entry(&tmp, struct request, queuelist);
-               rq->mq_ctx = ctx;
-               list_move_tail(&rq->queuelist, &ctx->rq_list);
-       }
-
-       hctx = q->mq_ops->map_queue(q, ctx->cpu);
-       blk_mq_hctx_mark_pending(hctx, ctx);
-
-       spin_unlock(&ctx->lock);
+       spin_lock(&hctx->lock);
+       list_splice_tail_init(&tmp, &hctx->dispatch);
+       spin_unlock(&hctx->lock);
 
        blk_mq_run_hw_queue(hctx, true);
-       blk_mq_put_ctx(ctx);
        return NOTIFY_OK;
 }
 
index 7096c22..f7d973a 100644 (file)
@@ -366,7 +366,7 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
        list_for_each_prev(entry, &q->queue_head) {
                struct request *pos = list_entry_rq(entry);
 
-               if ((req_op(rq) == REQ_OP_DISCARD) != (req_op(pos) == REQ_OP_DISCARD))
+               if (req_op(rq) != req_op(pos))
                        break;
                if (rq_data_dir(rq) != rq_data_dir(pos))
                        break;
index cf8037a..77207b4 100644 (file)
@@ -733,13 +733,14 @@ static void cryptd_aead_crypt(struct aead_request *req,
        rctx = aead_request_ctx(req);
        compl = rctx->complete;
 
+       tfm = crypto_aead_reqtfm(req);
+
        if (unlikely(err == -EINPROGRESS))
                goto out;
        aead_request_set_tfm(req, child);
        err = crypt( req );
 
 out:
-       tfm = crypto_aead_reqtfm(req);
        ctx = crypto_aead_ctx(tfm);
        refcnt = atomic_read(&ctx->refcnt);
 
index 4c745bf..161f915 100644 (file)
@@ -42,7 +42,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
                list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
                        struct acpi_nfit_system_address *spa = nfit_spa->spa;
 
-                       if (nfit_spa_type(spa) == NFIT_SPA_PM)
+                       if (nfit_spa_type(spa) != NFIT_SPA_PM)
                                continue;
                        /* find the spa that covers the mce addr */
                        if (spa->address > mce->addr)
index ad9fc84..e878fc7 100644 (file)
@@ -2054,7 +2054,7 @@ int __init acpi_scan_init(void)
 
 static struct acpi_probe_entry *ape;
 static int acpi_probe_count;
-static DEFINE_SPINLOCK(acpi_probe_lock);
+static DEFINE_MUTEX(acpi_probe_mutex);
 
 static int __init acpi_match_madt(struct acpi_subtable_header *header,
                                  const unsigned long end)
@@ -2073,7 +2073,7 @@ int __init __acpi_probe_device_table(struct acpi_probe_entry *ap_head, int nr)
        if (acpi_disabled)
                return 0;
 
-       spin_lock(&acpi_probe_lock);
+       mutex_lock(&acpi_probe_mutex);
        for (ape = ap_head; nr; ape++, nr--) {
                if (ACPI_COMPARE_NAME(ACPI_SIG_MADT, ape->id)) {
                        acpi_probe_count = 0;
@@ -2086,7 +2086,7 @@ int __init __acpi_probe_device_table(struct acpi_probe_entry *ap_head, int nr)
                                count++;
                }
        }
-       spin_unlock(&acpi_probe_lock);
+       mutex_unlock(&acpi_probe_mutex);
 
        return count;
 }
index 7461a58..dcf2c72 100644 (file)
@@ -2524,7 +2524,7 @@ static int ahci_host_activate_multi_irqs(struct ata_host *host,
 
                /* Do not receive interrupts sent by dummy ports */
                if (!pp) {
-                       disable_irq(irq + i);
+                       disable_irq(irq);
                        continue;
                }
 
index 633aa29..44f97ad 100644 (file)
@@ -144,7 +144,7 @@ static int ninja32_init_one(struct pci_dev *dev, const struct pci_device_id *id)
        ap->ioaddr.altstatus_addr = base + 0x1E;
        ap->ioaddr.bmdma_addr = base;
        ata_sff_std_ports(&ap->ioaddr);
-       ap->pflags = ATA_PFLAG_PIO32 | ATA_PFLAG_PIO32CHANGE;
+       ap->pflags |= ATA_PFLAG_PIO32 | ATA_PFLAG_PIO32CHANGE;
 
        ninja32_program(base);
        /* FIXME: Should we disable them at remove ? */
index 6339efd..f2aaf9e 100644 (file)
@@ -1845,8 +1845,9 @@ static int eni_start(struct atm_dev *dev)
        /* initialize memory management */
        buffer_mem = eni_dev->mem - (buf - eni_dev->ram);
        eni_dev->free_list_size = buffer_mem/MID_MIN_BUF_SIZE/2;
-       eni_dev->free_list = kmalloc(
-           sizeof(struct eni_free)*(eni_dev->free_list_size+1),GFP_KERNEL);
+       eni_dev->free_list = kmalloc_array(eni_dev->free_list_size + 1,
+                                          sizeof(*eni_dev->free_list),
+                                          GFP_KERNEL);
        if (!eni_dev->free_list) {
                printk(KERN_ERR DEV_LABEL "(itf %d): couldn't get free page\n",
                    dev->number);
index 75dde90..81aaa50 100644 (file)
@@ -2489,7 +2489,7 @@ static int fore200e_load_and_start_fw(struct fore200e *fore200e)
 {
     const struct firmware *firmware;
     struct device *device;
-    struct fw_header *fw_header;
+    const struct fw_header *fw_header;
     const __le32 *fw_data;
     u32 fw_size;
     u32 __iomem *load_addr;
@@ -2511,9 +2511,9 @@ static int fore200e_load_and_start_fw(struct fore200e *fore200e)
        return err;
     }
 
-    fw_data = (__le32 *) firmware->data;
+    fw_data = (const __le32 *)firmware->data;
     fw_size = firmware->size / sizeof(u32);
-    fw_header = (struct fw_header *) firmware->data;
+    fw_header = (const struct fw_header *)firmware->data;
     load_addr = fore200e->virt_base + le32_to_cpu(fw_header->load_offset);
 
     DPRINTK(2, "device %s firmware being loaded at 0x%p (%d words)\n",
index 0f5cb37..31b513a 100644 (file)
@@ -779,8 +779,9 @@ static int he_init_group(struct he_dev *he_dev, int group)
                  G0_RBPS_BS + (group * 32));
 
        /* bitmap table */
-       he_dev->rbpl_table = kmalloc(BITS_TO_LONGS(RBPL_TABLE_SIZE)
-                                    * sizeof(unsigned long), GFP_KERNEL);
+       he_dev->rbpl_table = kmalloc_array(BITS_TO_LONGS(RBPL_TABLE_SIZE),
+                                          sizeof(*he_dev->rbpl_table),
+                                          GFP_KERNEL);
        if (!he_dev->rbpl_table) {
                hprintk("unable to allocate rbpl bitmap table\n");
                return -ENOMEM;
@@ -788,8 +789,9 @@ static int he_init_group(struct he_dev *he_dev, int group)
        bitmap_zero(he_dev->rbpl_table, RBPL_TABLE_SIZE);
 
        /* rbpl_virt 64-bit pointers */
-       he_dev->rbpl_virt = kmalloc(RBPL_TABLE_SIZE
-                                   * sizeof(struct he_buff *), GFP_KERNEL);
+       he_dev->rbpl_virt = kmalloc_array(RBPL_TABLE_SIZE,
+                                         sizeof(*he_dev->rbpl_virt),
+                                         GFP_KERNEL);
        if (!he_dev->rbpl_virt) {
                hprintk("unable to allocate rbpl virt table\n");
                goto out_free_rbpl_table;
index 809dd1e..b275676 100644 (file)
@@ -1885,9 +1885,9 @@ static int open_tx(struct atm_vcc *vcc)
                 if ((ret = ia_cbr_setup (iadev, vcc)) < 0) {     
                     return ret;
                 }
-       } 
-       else  
-           printk("iadev:  Non UBR, ABR and CBR traffic not supportedn"); 
+       } else {
+               printk("iadev:  Non UBR, ABR and CBR traffic not supported\n");
+       }
         
         iadev->testTable[vcc->vci]->vc_status |= VC_ACTIVE;
        IF_EVENT(printk("ia open_tx returning \n");)  
@@ -1975,7 +1975,9 @@ static int tx_init(struct atm_dev *dev)
                buf_desc_ptr++;           
                tx_pkt_start += iadev->tx_buf_sz;  
        }  
-        iadev->tx_buf = kmalloc(iadev->num_tx_desc*sizeof(struct cpcs_trailer_desc), GFP_KERNEL);
+       iadev->tx_buf = kmalloc_array(iadev->num_tx_desc,
+                                     sizeof(*iadev->tx_buf),
+                                     GFP_KERNEL);
         if (!iadev->tx_buf) {
             printk(KERN_ERR DEV_LABEL " couldn't get mem\n");
            goto err_free_dle;
@@ -1995,8 +1997,9 @@ static int tx_init(struct atm_dev *dev)
                                                       sizeof(*cpcs),
                                                       DMA_TO_DEVICE);
         }
-        iadev->desc_tbl = kmalloc(iadev->num_tx_desc *
-                                   sizeof(struct desc_tbl_t), GFP_KERNEL);
+       iadev->desc_tbl = kmalloc_array(iadev->num_tx_desc,
+                                       sizeof(*iadev->desc_tbl),
+                                       GFP_KERNEL);
        if (!iadev->desc_tbl) {
                printk(KERN_ERR DEV_LABEL " couldn't get mem\n");
                goto err_free_all_tx_bufs;
@@ -2124,7 +2127,9 @@ static int tx_init(struct atm_dev *dev)
        memset((caddr_t)(iadev->seg_ram+i),  0, iadev->num_vc*4);
        vc = (struct main_vc *)iadev->MAIN_VC_TABLE_ADDR;  
        evc = (struct ext_vc *)iadev->EXT_VC_TABLE_ADDR;  
-        iadev->testTable = kmalloc(sizeof(long)*iadev->num_vc, GFP_KERNEL); 
+       iadev->testTable = kmalloc_array(iadev->num_vc,
+                                        sizeof(*iadev->testTable),
+                                        GFP_KERNEL);
         if (!iadev->testTable) {
            printk("Get freepage  failed\n");
           goto err_free_desc_tbl;
index 700ed15..c7296b5 100644 (file)
@@ -370,7 +370,8 @@ static int ns_init_card(int i, struct pci_dev *pcidev)
                return error;
         }
 
-       if ((card = kmalloc(sizeof(ns_dev), GFP_KERNEL)) == NULL) {
+       card = kmalloc(sizeof(*card), GFP_KERNEL);
+       if (!card) {
                printk
                    ("nicstar%d: can't allocate memory for device structure.\n",
                     i);
@@ -611,7 +612,7 @@ static int ns_init_card(int i, struct pci_dev *pcidev)
        for (j = 0; j < card->rct_size; j++)
                ns_write_sram(card, j * 4, u32d, 4);
 
-       memset(card->vcmap, 0, NS_MAX_RCTSIZE * sizeof(vc_map));
+       memset(card->vcmap, 0, sizeof(card->vcmap));
 
        for (j = 0; j < NS_FRSCD_NUM; j++)
                card->scd2vc[j] = NULL;
@@ -862,7 +863,7 @@ static scq_info *get_scq(ns_dev *card, int size, u32 scd)
        if (size != VBR_SCQSIZE && size != CBR_SCQSIZE)
                return NULL;
 
-       scq = kmalloc(sizeof(scq_info), GFP_KERNEL);
+       scq = kmalloc(sizeof(*scq), GFP_KERNEL);
        if (!scq)
                return NULL;
         scq->org = dma_alloc_coherent(&card->pcidev->dev,
@@ -871,8 +872,9 @@ static scq_info *get_scq(ns_dev *card, int size, u32 scd)
                kfree(scq);
                return NULL;
        }
-       scq->skb = kmalloc(sizeof(struct sk_buff *) *
-                          (size / NS_SCQE_SIZE), GFP_KERNEL);
+       scq->skb = kmalloc_array(size / NS_SCQE_SIZE,
+                                sizeof(*scq->skb),
+                                GFP_KERNEL);
        if (!scq->skb) {
                dma_free_coherent(&card->pcidev->dev,
                                  2 * size, scq->org, scq->dma);
@@ -2021,7 +2023,8 @@ static void dequeue_rx(ns_dev * card, ns_rsqe * rsqe)
 
                cell = skb->data;
                for (i = ns_rsqe_cellcount(rsqe); i; i--) {
-                       if ((sb = dev_alloc_skb(NS_SMSKBSIZE)) == NULL) {
+                       sb = dev_alloc_skb(NS_SMSKBSIZE);
+                       if (!sb) {
                                printk
                                    ("nicstar%d: Can't allocate buffers for aal0.\n",
                                     card->index);
index cecfb94..d3dc954 100644 (file)
@@ -598,12 +598,13 @@ static void close_rx(struct atm_vcc *vcc)
 static int start_rx(struct atm_dev *dev)
 {
        struct zatm_dev *zatm_dev;
-       int size,i;
+       int i;
 
-DPRINTK("start_rx\n");
+       DPRINTK("start_rx\n");
        zatm_dev = ZATM_DEV(dev);
-       size = sizeof(struct atm_vcc *)*zatm_dev->chans;
-       zatm_dev->rx_map =  kzalloc(size,GFP_KERNEL);
+       zatm_dev->rx_map = kcalloc(zatm_dev->chans,
+                                  sizeof(*zatm_dev->rx_map),
+                                  GFP_KERNEL);
        if (!zatm_dev->rx_map) return -ENOMEM;
        /* set VPI/VCI split (use all VCIs and give what's left to VPIs) */
        zpokel(zatm_dev,(1 << dev->ci_range.vci_bits)-1,uPD98401_VRR);
@@ -998,8 +999,9 @@ static int start_tx(struct atm_dev *dev)
 
        DPRINTK("start_tx\n");
        zatm_dev = ZATM_DEV(dev);
-       zatm_dev->tx_map = kmalloc(sizeof(struct atm_vcc *)*
-           zatm_dev->chans,GFP_KERNEL);
+       zatm_dev->tx_map = kmalloc_array(zatm_dev->chans,
+                                        sizeof(*zatm_dev->tx_map),
+                                        GFP_KERNEL);
        if (!zatm_dev->tx_map) return -ENOMEM;
        zatm_dev->tx_bw = ATM_OC3_PCR;
        zatm_dev->free_shapers = (1 << NR_SHAPERS)-1;
@@ -1398,7 +1400,7 @@ static int zatm_open(struct atm_vcc *vcc)
        DPRINTK(DEV_LABEL "(itf %d): open %d.%d\n",vcc->dev->number,vcc->vpi,
            vcc->vci);
        if (!test_bit(ATM_VF_PARTIAL,&vcc->flags)) {
-               zatm_vcc = kmalloc(sizeof(struct zatm_vcc),GFP_KERNEL);
+               zatm_vcc = kmalloc(sizeof(*zatm_vcc), GFP_KERNEL);
                if (!zatm_vcc) {
                        clear_bit(ATM_VF_ADDR,&vcc->flags);
                        return -ENOMEM;
index e097d35..17995fa 100644 (file)
@@ -301,7 +301,7 @@ static int rpm_idle(struct device *dev, int rpmflags)
        int (*callback)(struct device *);
        int retval;
 
-       trace_rpm_idle(dev, rpmflags);
+       trace_rpm_idle_rcuidle(dev, rpmflags);
        retval = rpm_check_suspend_allowed(dev);
        if (retval < 0)
                ;       /* Conditions are wrong. */
@@ -337,7 +337,7 @@ static int rpm_idle(struct device *dev, int rpmflags)
                        dev->power.request_pending = true;
                        queue_work(pm_wq, &dev->power.work);
                }
-               trace_rpm_return_int(dev, _THIS_IP_, 0);
+               trace_rpm_return_int_rcuidle(dev, _THIS_IP_, 0);
                return 0;
        }
 
@@ -352,7 +352,7 @@ static int rpm_idle(struct device *dev, int rpmflags)
        wake_up_all(&dev->power.wait_queue);
 
  out:
-       trace_rpm_return_int(dev, _THIS_IP_, retval);
+       trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
        return retval ? retval : rpm_suspend(dev, rpmflags | RPM_AUTO);
 }
 
@@ -601,7 +601,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
        struct device *parent = NULL;
        int retval = 0;
 
-       trace_rpm_resume(dev, rpmflags);
+       trace_rpm_resume_rcuidle(dev, rpmflags);
 
  repeat:
        if (dev->power.runtime_error)
@@ -764,7 +764,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
                spin_lock_irq(&dev->power.lock);
        }
 
-       trace_rpm_return_int(dev, _THIS_IP_, retval);
+       trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
 
        return retval;
 }
index aa56af8..b11af3f 100644 (file)
@@ -404,6 +404,7 @@ static int regcache_rbtree_write(struct regmap *map, unsigned int reg,
                unsigned int new_base_reg, new_top_reg;
                unsigned int min, max;
                unsigned int max_dist;
+               unsigned int dist, best_dist = UINT_MAX;
 
                max_dist = map->reg_stride * sizeof(*rbnode_tmp) /
                        map->cache_word_size;
@@ -423,24 +424,41 @@ static int regcache_rbtree_write(struct regmap *map, unsigned int reg,
                                &base_reg, &top_reg);
 
                        if (base_reg <= max && top_reg >= min) {
-                               new_base_reg = min(reg, base_reg);
-                               new_top_reg = max(reg, top_reg);
-                       } else {
-                               if (max < base_reg)
-                                       node = node->rb_left;
+                               if (reg < base_reg)
+                                       dist = base_reg - reg;
+                               else if (reg > top_reg)
+                                       dist = reg - top_reg;
                                else
-                                       node = node->rb_right;
-
-                               continue;
+                                       dist = 0;
+                               if (dist < best_dist) {
+                                       rbnode = rbnode_tmp;
+                                       best_dist = dist;
+                                       new_base_reg = min(reg, base_reg);
+                                       new_top_reg = max(reg, top_reg);
+                               }
                        }
 
-                       ret = regcache_rbtree_insert_to_block(map, rbnode_tmp,
+                       /*
+                        * Keep looking, we want to choose the closest block,
+                        * otherwise we might end up creating overlapping
+                        * blocks, which breaks the rbtree.
+                        */
+                       if (reg < base_reg)
+                               node = node->rb_left;
+                       else if (reg > top_reg)
+                               node = node->rb_right;
+                       else
+                               break;
+               }
+
+               if (rbnode) {
+                       ret = regcache_rbtree_insert_to_block(map, rbnode,
                                                              new_base_reg,
                                                              new_top_reg, reg,
                                                              value);
                        if (ret)
                                return ret;
-                       rbtree_ctx->cached_rbnode = rbnode_tmp;
+                       rbtree_ctx->cached_rbnode = rbnode;
                        return 0;
                }
 
index df7ff72..4e58256 100644 (file)
@@ -38,10 +38,11 @@ static int regcache_hw_init(struct regmap *map)
 
        /* calculate the size of reg_defaults */
        for (count = 0, i = 0; i < map->num_reg_defaults_raw; i++)
-               if (!regmap_volatile(map, i * map->reg_stride))
+               if (regmap_readable(map, i * map->reg_stride) &&
+                   !regmap_volatile(map, i * map->reg_stride))
                        count++;
 
-       /* all registers are volatile, so just bypass */
+       /* all registers are unreadable or volatile, so just bypass */
        if (!count) {
                map->cache_bypass = true;
                return 0;
index 51fa7d6..25d26bb 100644 (file)
@@ -1474,6 +1474,8 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
                ret = map->bus->write(map->bus_context, buf, len);
 
                kfree(buf);
+       } else if (ret != 0 && !map->cache_bypass && map->format.parse_val) {
+               regcache_drop_region(map, reg, reg + 1);
        }
 
        trace_regmap_hw_write_done(map, reg, val_len / map->format.val_bytes);
index b71a9c7..e3d8e4c 100644 (file)
@@ -3706,22 +3706,21 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
        if (UFDCS->rawcmd == 1)
                UFDCS->rawcmd = 2;
 
-       if (mode & (FMODE_READ|FMODE_WRITE)) {
-               UDRS->last_checked = 0;
-               clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
-               check_disk_change(bdev);
-               if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
-                       goto out;
-               if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
+       if (!(mode & FMODE_NDELAY)) {
+               if (mode & (FMODE_READ|FMODE_WRITE)) {
+                       UDRS->last_checked = 0;
+                       clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
+                       check_disk_change(bdev);
+                       if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
+                               goto out;
+                       if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
+                               goto out;
+               }
+               res = -EROFS;
+               if ((mode & FMODE_WRITE) &&
+                   !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
                        goto out;
        }
-
-       res = -EROFS;
-
-       if ((mode & FMODE_WRITE) &&
-                       !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
-               goto out;
-
        mutex_unlock(&open_lock);
        mutex_unlock(&floppy_mutex);
        return 0;
index be4fea6..88ef6d4 100644 (file)
@@ -189,6 +189,8 @@ struct blkfront_info
        struct mutex mutex;
        struct xenbus_device *xbdev;
        struct gendisk *gd;
+       u16 sector_size;
+       unsigned int physical_sector_size;
        int vdevice;
        blkif_vdev_t handle;
        enum blkif_state connected;
@@ -910,9 +912,45 @@ static struct blk_mq_ops blkfront_mq_ops = {
        .map_queue = blk_mq_map_queue,
 };
 
+static void blkif_set_queue_limits(struct blkfront_info *info)
+{
+       struct request_queue *rq = info->rq;
+       struct gendisk *gd = info->gd;
+       unsigned int segments = info->max_indirect_segments ? :
+                               BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+       queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
+
+       if (info->feature_discard) {
+               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
+               blk_queue_max_discard_sectors(rq, get_capacity(gd));
+               rq->limits.discard_granularity = info->discard_granularity;
+               rq->limits.discard_alignment = info->discard_alignment;
+               if (info->feature_secdiscard)
+                       queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq);
+       }
+
+       /* Hard sector size and max sectors impersonate the equiv. hardware. */
+       blk_queue_logical_block_size(rq, info->sector_size);
+       blk_queue_physical_block_size(rq, info->physical_sector_size);
+       blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512);
+
+       /* Each segment in a request is up to an aligned page in size. */
+       blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
+       blk_queue_max_segment_size(rq, PAGE_SIZE);
+
+       /* Ensure a merged request will fit in a single I/O ring slot. */
+       blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG);
+
+       /* Make sure buffer addresses are sector-aligned. */
+       blk_queue_dma_alignment(rq, 511);
+
+       /* Make sure we don't use bounce buffers. */
+       blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
+}
+
 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
-                               unsigned int physical_sector_size,
-                               unsigned int segments)
+                               unsigned int physical_sector_size)
 {
        struct request_queue *rq;
        struct blkfront_info *info = gd->private_data;
@@ -944,36 +982,11 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
        }
 
        rq->queuedata = info;
-       queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
-
-       if (info->feature_discard) {
-               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
-               blk_queue_max_discard_sectors(rq, get_capacity(gd));
-               rq->limits.discard_granularity = info->discard_granularity;
-               rq->limits.discard_alignment = info->discard_alignment;
-               if (info->feature_secdiscard)
-                       queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq);
-       }
-
-       /* Hard sector size and max sectors impersonate the equiv. hardware. */
-       blk_queue_logical_block_size(rq, sector_size);
-       blk_queue_physical_block_size(rq, physical_sector_size);
-       blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512);
-
-       /* Each segment in a request is up to an aligned page in size. */
-       blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
-       blk_queue_max_segment_size(rq, PAGE_SIZE);
-
-       /* Ensure a merged request will fit in a single I/O ring slot. */
-       blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG);
-
-       /* Make sure buffer addresses are sector-aligned. */
-       blk_queue_dma_alignment(rq, 511);
-
-       /* Make sure we don't use bounce buffers. */
-       blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
-
-       gd->queue = rq;
+       info->rq = gd->queue = rq;
+       info->gd = gd;
+       info->sector_size = sector_size;
+       info->physical_sector_size = physical_sector_size;
+       blkif_set_queue_limits(info);
 
        return 0;
 }
@@ -1136,16 +1149,11 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
        gd->private_data = info;
        set_capacity(gd, capacity);
 
-       if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size,
-                                info->max_indirect_segments ? :
-                                BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+       if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) {
                del_gendisk(gd);
                goto release;
        }
 
-       info->rq = gd->queue;
-       info->gd = gd;
-
        xlvbd_flush(info);
 
        if (vdisk_info & VDISK_READONLY)
@@ -1315,7 +1323,7 @@ free_shadow:
                        rinfo->ring_ref[i] = GRANT_INVALID_REF;
                }
        }
-       free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
+       free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
        rinfo->ring.sring = NULL;
 
        if (rinfo->irq)
@@ -2007,8 +2015,10 @@ static int blkif_recover(struct blkfront_info *info)
        struct split_bio *split_bio;
 
        blkfront_gather_backend_features(info);
+       /* Reset limits changed by blk_mq_update_nr_hw_queues(). */
+       blkif_set_queue_limits(info);
        segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
-       blk_queue_max_segments(info->rq, segs);
+       blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG);
 
        for (r_index = 0; r_index < info->nr_rings; r_index++) {
                struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
@@ -2432,7 +2442,7 @@ static void blkfront_connect(struct blkfront_info *info)
        if (err) {
                xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
                                 info->xbdev->otherend);
-               return;
+               goto fail;
        }
 
        xenbus_switch_state(info->xbdev, XenbusStateConnected);
@@ -2445,6 +2455,11 @@ static void blkfront_connect(struct blkfront_info *info)
        device_add_disk(&info->xbdev->dev, info->gd);
 
        info->is_ready = 1;
+       return;
+
+fail:
+       blkif_free(info, 0);
+       return;
 }
 
 /**
index 5755907..ffa7c9d 100644 (file)
@@ -551,7 +551,7 @@ static struct attribute *cci5xx_pmu_event_attrs[] = {
        CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB),
        CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC),
        CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD),
-       CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snopp_rq_stall_tt_full, 0xE),
+       CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_stall_tt_full, 0xE),
        CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF),
        NULL
 };
index 97a9185..884c030 100644 (file)
@@ -187,6 +187,7 @@ struct arm_ccn {
        struct arm_ccn_component *xp;
 
        struct arm_ccn_dt dt;
+       int mn_id;
 };
 
 static DEFINE_MUTEX(arm_ccn_mutex);
@@ -212,6 +213,7 @@ static int arm_ccn_node_to_xp_port(int node)
 #define CCN_CONFIG_TYPE(_config)       (((_config) >> 8) & 0xff)
 #define CCN_CONFIG_EVENT(_config)      (((_config) >> 16) & 0xff)
 #define CCN_CONFIG_PORT(_config)       (((_config) >> 24) & 0x3)
+#define CCN_CONFIG_BUS(_config)                (((_config) >> 24) & 0x3)
 #define CCN_CONFIG_VC(_config)         (((_config) >> 26) & 0x7)
 #define CCN_CONFIG_DIR(_config)                (((_config) >> 29) & 0x1)
 #define CCN_CONFIG_MASK(_config)       (((_config) >> 30) & 0xf)
@@ -241,6 +243,7 @@ static CCN_FORMAT_ATTR(xp, "config:0-7");
 static CCN_FORMAT_ATTR(type, "config:8-15");
 static CCN_FORMAT_ATTR(event, "config:16-23");
 static CCN_FORMAT_ATTR(port, "config:24-25");
+static CCN_FORMAT_ATTR(bus, "config:24-25");
 static CCN_FORMAT_ATTR(vc, "config:26-28");
 static CCN_FORMAT_ATTR(dir, "config:29-29");
 static CCN_FORMAT_ATTR(mask, "config:30-33");
@@ -253,6 +256,7 @@ static struct attribute *arm_ccn_pmu_format_attrs[] = {
        &arm_ccn_pmu_format_attr_type.attr.attr,
        &arm_ccn_pmu_format_attr_event.attr.attr,
        &arm_ccn_pmu_format_attr_port.attr.attr,
+       &arm_ccn_pmu_format_attr_bus.attr.attr,
        &arm_ccn_pmu_format_attr_vc.attr.attr,
        &arm_ccn_pmu_format_attr_dir.attr.attr,
        &arm_ccn_pmu_format_attr_mask.attr.attr,
@@ -328,6 +332,7 @@ struct arm_ccn_pmu_event {
 static ssize_t arm_ccn_pmu_event_show(struct device *dev,
                struct device_attribute *attr, char *buf)
 {
+       struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev));
        struct arm_ccn_pmu_event *event = container_of(attr,
                        struct arm_ccn_pmu_event, attr);
        ssize_t res;
@@ -349,10 +354,17 @@ static ssize_t arm_ccn_pmu_event_show(struct device *dev,
                break;
        case CCN_TYPE_XP:
                res += snprintf(buf + res, PAGE_SIZE - res,
-                               ",xp=?,port=?,vc=?,dir=?");
+                               ",xp=?,vc=?");
                if (event->event == CCN_EVENT_WATCHPOINT)
                        res += snprintf(buf + res, PAGE_SIZE - res,
-                                       ",cmp_l=?,cmp_h=?,mask=?");
+                                       ",port=?,dir=?,cmp_l=?,cmp_h=?,mask=?");
+               else
+                       res += snprintf(buf + res, PAGE_SIZE - res,
+                                       ",bus=?");
+
+               break;
+       case CCN_TYPE_MN:
+               res += snprintf(buf + res, PAGE_SIZE - res, ",node=%d", ccn->mn_id);
                break;
        default:
                res += snprintf(buf + res, PAGE_SIZE - res, ",node=?");
@@ -383,9 +395,9 @@ static umode_t arm_ccn_pmu_events_is_visible(struct kobject *kobj,
 }
 
 static struct arm_ccn_pmu_event arm_ccn_pmu_events[] = {
-       CCN_EVENT_MN(eobarrier, "dir=0,vc=0,cmp_h=0x1c00", CCN_IDX_MASK_OPCODE),
-       CCN_EVENT_MN(ecbarrier, "dir=0,vc=0,cmp_h=0x1e00", CCN_IDX_MASK_OPCODE),
-       CCN_EVENT_MN(dvmop, "dir=0,vc=0,cmp_h=0x2800", CCN_IDX_MASK_OPCODE),
+       CCN_EVENT_MN(eobarrier, "dir=1,vc=0,cmp_h=0x1c00", CCN_IDX_MASK_OPCODE),
+       CCN_EVENT_MN(ecbarrier, "dir=1,vc=0,cmp_h=0x1e00", CCN_IDX_MASK_OPCODE),
+       CCN_EVENT_MN(dvmop, "dir=1,vc=0,cmp_h=0x2800", CCN_IDX_MASK_OPCODE),
        CCN_EVENT_HNI(txdatflits, "dir=1,vc=3", CCN_IDX_MASK_ANY),
        CCN_EVENT_HNI(rxdatflits, "dir=0,vc=3", CCN_IDX_MASK_ANY),
        CCN_EVENT_HNI(txreqflits, "dir=1,vc=0", CCN_IDX_MASK_ANY),
@@ -733,9 +745,10 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
 
        if (has_branch_stack(event) || event->attr.exclude_user ||
                        event->attr.exclude_kernel || event->attr.exclude_hv ||
-                       event->attr.exclude_idle) {
+                       event->attr.exclude_idle || event->attr.exclude_host ||
+                       event->attr.exclude_guest) {
                dev_warn(ccn->dev, "Can't exclude execution levels!\n");
-               return -EOPNOTSUPP;
+               return -EINVAL;
        }
 
        if (event->cpu < 0) {
@@ -759,6 +772,12 @@ static int arm_ccn_pmu_event_init(struct perf_event *event)
 
        /* Validate node/xp vs topology */
        switch (type) {
+       case CCN_TYPE_MN:
+               if (node_xp != ccn->mn_id) {
+                       dev_warn(ccn->dev, "Invalid MN ID %d!\n", node_xp);
+                       return -EINVAL;
+               }
+               break;
        case CCN_TYPE_XP:
                if (node_xp >= ccn->num_xps) {
                        dev_warn(ccn->dev, "Invalid XP ID %d!\n", node_xp);
@@ -886,6 +905,10 @@ static void arm_ccn_pmu_xp_dt_config(struct perf_event *event, int enable)
        struct arm_ccn_component *xp;
        u32 val, dt_cfg;
 
+       /* Nothing to do for cycle counter */
+       if (hw->idx == CCN_IDX_PMU_CYCLE_COUNTER)
+               return;
+
        if (CCN_CONFIG_TYPE(event->attr.config) == CCN_TYPE_XP)
                xp = &ccn->xp[CCN_CONFIG_XP(event->attr.config)];
        else
@@ -917,38 +940,17 @@ static void arm_ccn_pmu_event_start(struct perf_event *event, int flags)
                        arm_ccn_pmu_read_counter(ccn, hw->idx));
        hw->state = 0;
 
-       /*
-        * Pin the timer, so that the overflows are handled by the chosen
-        * event->cpu (this is the same one as presented in "cpumask"
-        * attribute).
-        */
-       if (!ccn->irq)
-               hrtimer_start(&ccn->dt.hrtimer, arm_ccn_pmu_timer_period(),
-                               HRTIMER_MODE_REL_PINNED);
-
        /* Set the DT bus input, engaging the counter */
        arm_ccn_pmu_xp_dt_config(event, 1);
 }
 
 static void arm_ccn_pmu_event_stop(struct perf_event *event, int flags)
 {
-       struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
        struct hw_perf_event *hw = &event->hw;
-       u64 timeout;
 
        /* Disable counting, setting the DT bus to pass-through mode */
        arm_ccn_pmu_xp_dt_config(event, 0);
 
-       if (!ccn->irq)
-               hrtimer_cancel(&ccn->dt.hrtimer);
-
-       /* Let the DT bus drain */
-       timeout = arm_ccn_pmu_read_counter(ccn, CCN_IDX_PMU_CYCLE_COUNTER) +
-                       ccn->num_xps;
-       while (arm_ccn_pmu_read_counter(ccn, CCN_IDX_PMU_CYCLE_COUNTER) <
-                       timeout)
-               cpu_relax();
-
        if (flags & PERF_EF_UPDATE)
                arm_ccn_pmu_event_update(event);
 
@@ -988,7 +990,7 @@ static void arm_ccn_pmu_xp_watchpoint_config(struct perf_event *event)
 
        /* Comparison values */
        writel(cmp_l & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_L(wp));
-       writel((cmp_l >> 32) & 0xefffffff,
+       writel((cmp_l >> 32) & 0x7fffffff,
                        source->base + CCN_XP_DT_CMP_VAL_L(wp) + 4);
        writel(cmp_h & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_H(wp));
        writel((cmp_h >> 32) & 0x0fffffff,
@@ -996,7 +998,7 @@ static void arm_ccn_pmu_xp_watchpoint_config(struct perf_event *event)
 
        /* Mask */
        writel(mask_l & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_L(wp));
-       writel((mask_l >> 32) & 0xefffffff,
+       writel((mask_l >> 32) & 0x7fffffff,
                        source->base + CCN_XP_DT_CMP_MASK_L(wp) + 4);
        writel(mask_h & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_H(wp));
        writel((mask_h >> 32) & 0x0fffffff,
@@ -1014,7 +1016,7 @@ static void arm_ccn_pmu_xp_event_config(struct perf_event *event)
        hw->event_base = CCN_XP_DT_CONFIG__DT_CFG__XP_PMU_EVENT(hw->config_base);
 
        id = (CCN_CONFIG_VC(event->attr.config) << 4) |
-                       (CCN_CONFIG_PORT(event->attr.config) << 3) |
+                       (CCN_CONFIG_BUS(event->attr.config) << 3) |
                        (CCN_CONFIG_EVENT(event->attr.config) << 0);
 
        val = readl(source->base + CCN_XP_PMU_EVENT_SEL);
@@ -1099,15 +1101,31 @@ static void arm_ccn_pmu_event_config(struct perf_event *event)
        spin_unlock(&ccn->dt.config_lock);
 }
 
+static int arm_ccn_pmu_active_counters(struct arm_ccn *ccn)
+{
+       return bitmap_weight(ccn->dt.pmu_counters_mask,
+                            CCN_NUM_PMU_EVENT_COUNTERS + 1);
+}
+
 static int arm_ccn_pmu_event_add(struct perf_event *event, int flags)
 {
        int err;
        struct hw_perf_event *hw = &event->hw;
+       struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
 
        err = arm_ccn_pmu_event_alloc(event);
        if (err)
                return err;
 
+       /*
+        * Pin the timer, so that the overflows are handled by the chosen
+        * event->cpu (this is the same one as presented in "cpumask"
+        * attribute).
+        */
+       if (!ccn->irq && arm_ccn_pmu_active_counters(ccn) == 1)
+               hrtimer_start(&ccn->dt.hrtimer, arm_ccn_pmu_timer_period(),
+                             HRTIMER_MODE_REL_PINNED);
+
        arm_ccn_pmu_event_config(event);
 
        hw->state = PERF_HES_STOPPED;
@@ -1120,9 +1138,14 @@ static int arm_ccn_pmu_event_add(struct perf_event *event, int flags)
 
 static void arm_ccn_pmu_event_del(struct perf_event *event, int flags)
 {
+       struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
+
        arm_ccn_pmu_event_stop(event, PERF_EF_UPDATE);
 
        arm_ccn_pmu_event_release(event);
+
+       if (!ccn->irq && arm_ccn_pmu_active_counters(ccn) == 0)
+               hrtimer_cancel(&ccn->dt.hrtimer);
 }
 
 static void arm_ccn_pmu_event_read(struct perf_event *event)
@@ -1130,6 +1153,24 @@ static void arm_ccn_pmu_event_read(struct perf_event *event)
        arm_ccn_pmu_event_update(event);
 }
 
+static void arm_ccn_pmu_enable(struct pmu *pmu)
+{
+       struct arm_ccn *ccn = pmu_to_arm_ccn(pmu);
+
+       u32 val = readl(ccn->dt.base + CCN_DT_PMCR);
+       val |= CCN_DT_PMCR__PMU_EN;
+       writel(val, ccn->dt.base + CCN_DT_PMCR);
+}
+
+static void arm_ccn_pmu_disable(struct pmu *pmu)
+{
+       struct arm_ccn *ccn = pmu_to_arm_ccn(pmu);
+
+       u32 val = readl(ccn->dt.base + CCN_DT_PMCR);
+       val &= ~CCN_DT_PMCR__PMU_EN;
+       writel(val, ccn->dt.base + CCN_DT_PMCR);
+}
+
 static irqreturn_t arm_ccn_pmu_overflow_handler(struct arm_ccn_dt *dt)
 {
        u32 pmovsr = readl(dt->base + CCN_DT_PMOVSR);
@@ -1252,6 +1293,8 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
                .start = arm_ccn_pmu_event_start,
                .stop = arm_ccn_pmu_event_stop,
                .read = arm_ccn_pmu_event_read,
+               .pmu_enable = arm_ccn_pmu_enable,
+               .pmu_disable = arm_ccn_pmu_disable,
        };
 
        /* No overflow interrupt? Have to use a timer instead. */
@@ -1361,6 +1404,8 @@ static int arm_ccn_init_nodes(struct arm_ccn *ccn, int region,
 
        switch (type) {
        case CCN_TYPE_MN:
+               ccn->mn_id = id;
+               return 0;
        case CCN_TYPE_DT:
                return 0;
        case CCN_TYPE_XP:
@@ -1471,8 +1516,9 @@ static int arm_ccn_probe(struct platform_device *pdev)
                /* Can set 'disable' bits, so can acknowledge interrupts */
                writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__ENABLE,
                                ccn->base + CCN_MN_ERRINT_STATUS);
-               err = devm_request_irq(ccn->dev, irq, arm_ccn_irq_handler, 0,
-                               dev_name(ccn->dev), ccn);
+               err = devm_request_irq(ccn->dev, irq, arm_ccn_irq_handler,
+                                      IRQF_NOBALANCING | IRQF_NO_THREAD,
+                                      dev_name(ccn->dev), ccn);
                if (err)
                        return err;
 
index c3cb76b..9efdf1d 100644 (file)
@@ -178,6 +178,7 @@ static int vexpress_config_populate(struct device_node *node)
 
        parent = class_find_device(vexpress_config_class, NULL, bridge,
                        vexpress_config_node_match);
+       of_node_put(bridge);
        if (WARN_ON(!parent))
                return -ENODEV;
 
index 56ad5a5..8c0770b 100644 (file)
@@ -244,7 +244,7 @@ config HW_RANDOM_TX4939
 
 config HW_RANDOM_MXC_RNGA
        tristate "Freescale i.MX RNGA Random Number Generator"
-       depends on ARCH_HAS_RNGA
+       depends on SOC_IMX31
        default HW_RANDOM
        ---help---
          This driver provides kernel-side support for the Random Number
index 08c7e23..0c75c3f 100644 (file)
@@ -957,7 +957,7 @@ int tpm2_auto_startup(struct tpm_chip *chip)
                goto out;
 
        rc = tpm2_do_selftest(chip);
-       if (rc != TPM2_RC_INITIALIZE) {
+       if (rc != 0 && rc != TPM2_RC_INITIALIZE) {
                dev_err(&chip->dev, "TPM self test failed\n");
                goto out;
        }
@@ -974,7 +974,6 @@ int tpm2_auto_startup(struct tpm_chip *chip)
                }
        }
 
-       return rc;
 out:
        if (rc > 0)
                rc = -ENODEV;
index d2406fe..5da47e2 100644 (file)
@@ -165,6 +165,12 @@ struct ports_device {
         */
        struct virtqueue *c_ivq, *c_ovq;
 
+       /*
+        * A control packet buffer for guest->host requests, protected
+        * by c_ovq_lock.
+        */
+       struct virtio_console_control cpkt;
+
        /* Array of per-port IO virtqueues */
        struct virtqueue **in_vqs, **out_vqs;
 
@@ -560,28 +566,29 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id,
                                  unsigned int event, unsigned int value)
 {
        struct scatterlist sg[1];
-       struct virtio_console_control cpkt;
        struct virtqueue *vq;
        unsigned int len;
 
        if (!use_multiport(portdev))
                return 0;
 
-       cpkt.id = cpu_to_virtio32(portdev->vdev, port_id);
-       cpkt.event = cpu_to_virtio16(portdev->vdev, event);
-       cpkt.value = cpu_to_virtio16(portdev->vdev, value);
-
        vq = portdev->c_ovq;
 
-       sg_init_one(sg, &cpkt, sizeof(cpkt));
-
        spin_lock(&portdev->c_ovq_lock);
-       if (virtqueue_add_outbuf(vq, sg, 1, &cpkt, GFP_ATOMIC) == 0) {
+
+       portdev->cpkt.id = cpu_to_virtio32(portdev->vdev, port_id);
+       portdev->cpkt.event = cpu_to_virtio16(portdev->vdev, event);
+       portdev->cpkt.value = cpu_to_virtio16(portdev->vdev, value);
+
+       sg_init_one(sg, &portdev->cpkt, sizeof(struct virtio_console_control));
+
+       if (virtqueue_add_outbuf(vq, sg, 1, &portdev->cpkt, GFP_ATOMIC) == 0) {
                virtqueue_kick(vq);
                while (!virtqueue_get_buf(vq, &len)
                        && !virtqueue_is_broken(vq))
                        cpu_relax();
        }
+
        spin_unlock(&portdev->c_ovq_lock);
        return 0;
 }
index d359c92..e38bf60 100644 (file)
@@ -69,6 +69,7 @@ static const struct cpg_core_clk r8a7795_core_clks[] __initconst = {
        DEF_FIXED(".s1",        CLK_S1,            CLK_PLL1_DIV2,  3, 1),
        DEF_FIXED(".s2",        CLK_S2,            CLK_PLL1_DIV2,  4, 1),
        DEF_FIXED(".s3",        CLK_S3,            CLK_PLL1_DIV2,  6, 1),
+       DEF_FIXED(".sdsrc",     CLK_SDSRC,         CLK_PLL1_DIV2,  2, 1),
 
        /* Core Clock Outputs */
        DEF_FIXED("ztr",        R8A7795_CLK_ZTR,   CLK_PLL1_DIV2,  6, 1),
@@ -87,10 +88,10 @@ static const struct cpg_core_clk r8a7795_core_clks[] __initconst = {
        DEF_FIXED("s3d2",       R8A7795_CLK_S3D2,  CLK_S3,         2, 1),
        DEF_FIXED("s3d4",       R8A7795_CLK_S3D4,  CLK_S3,         4, 1),
 
-       DEF_GEN3_SD("sd0",      R8A7795_CLK_SD0,   CLK_PLL1_DIV2, 0x0074),
-       DEF_GEN3_SD("sd1",      R8A7795_CLK_SD1,   CLK_PLL1_DIV2, 0x0078),
-       DEF_GEN3_SD("sd2",      R8A7795_CLK_SD2,   CLK_PLL1_DIV2, 0x0268),
-       DEF_GEN3_SD("sd3",      R8A7795_CLK_SD3,   CLK_PLL1_DIV2, 0x026c),
+       DEF_GEN3_SD("sd0",      R8A7795_CLK_SD0,   CLK_SDSRC,     0x0074),
+       DEF_GEN3_SD("sd1",      R8A7795_CLK_SD1,   CLK_SDSRC,     0x0078),
+       DEF_GEN3_SD("sd2",      R8A7795_CLK_SD2,   CLK_SDSRC,     0x0268),
+       DEF_GEN3_SD("sd3",      R8A7795_CLK_SD3,   CLK_SDSRC,     0x026c),
 
        DEF_FIXED("cl",         R8A7795_CLK_CL,    CLK_PLL1_DIV2, 48, 1),
        DEF_FIXED("cp",         R8A7795_CLK_CP,    CLK_EXTAL,      2, 1),
index c109d80..cdfabeb 100644 (file)
@@ -833,9 +833,9 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = {
 
        /* perihp */
        GATE(0, "cpll_aclk_perihp_src", "cpll", CLK_IGNORE_UNUSED,
-                       RK3399_CLKGATE_CON(5), 0, GFLAGS),
-       GATE(0, "gpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED,
                        RK3399_CLKGATE_CON(5), 1, GFLAGS),
+       GATE(0, "gpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED,
+                       RK3399_CLKGATE_CON(5), 0, GFLAGS),
        COMPOSITE(ACLK_PERIHP, "aclk_perihp", mux_aclk_perihp_p, CLK_IGNORE_UNUSED,
                        RK3399_CLKSEL_CON(14), 7, 1, MFLAGS, 0, 5, DFLAGS,
                        RK3399_CLKGATE_CON(5), 2, GFLAGS),
@@ -923,9 +923,9 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = {
                        RK3399_CLKGATE_CON(6), 14, GFLAGS),
 
        GATE(0, "cpll_aclk_emmc_src", "cpll", CLK_IGNORE_UNUSED,
-                       RK3399_CLKGATE_CON(6), 12, GFLAGS),
-       GATE(0, "gpll_aclk_emmc_src", "gpll", CLK_IGNORE_UNUSED,
                        RK3399_CLKGATE_CON(6), 13, GFLAGS),
+       GATE(0, "gpll_aclk_emmc_src", "gpll", CLK_IGNORE_UNUSED,
+                       RK3399_CLKGATE_CON(6), 12, GFLAGS),
        COMPOSITE_NOGATE(ACLK_EMMC, "aclk_emmc", mux_aclk_emmc_p, CLK_IGNORE_UNUSED,
                        RK3399_CLKSEL_CON(21), 7, 1, MFLAGS, 0, 5, DFLAGS),
        GATE(ACLK_EMMC_CORE, "aclk_emmccore", "aclk_emmc", CLK_IGNORE_UNUSED,
@@ -1071,7 +1071,7 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = {
        /* vio */
        COMPOSITE(ACLK_VIO, "aclk_vio", mux_pll_src_cpll_gpll_ppll_p, CLK_IGNORE_UNUSED,
                        RK3399_CLKSEL_CON(42), 6, 2, MFLAGS, 0, 5, DFLAGS,
-                       RK3399_CLKGATE_CON(11), 10, GFLAGS),
+                       RK3399_CLKGATE_CON(11), 0, GFLAGS),
        COMPOSITE_NOMUX(PCLK_VIO, "pclk_vio", "aclk_vio", 0,
                        RK3399_CLKSEL_CON(43), 0, 5, DFLAGS,
                        RK3399_CLKGATE_CON(11), 1, GFLAGS),
@@ -1484,6 +1484,7 @@ static const char *const rk3399_cru_critical_clocks[] __initconst = {
        "hclk_perilp1",
        "hclk_perilp1_noc",
        "aclk_dmac0_perilp",
+       "aclk_emmc_noc",
        "gpll_hclk_perilp1_src",
        "gpll_aclk_perilp0_src",
        "gpll_aclk_perihp_src",
index fc17b52..51d4bac 100644 (file)
@@ -31,7 +31,7 @@ void ccu_helper_wait_for_lock(struct ccu_common *common, u32 lock)
                return;
 
        WARN_ON(readl_relaxed_poll_timeout(common->base + common->reg, reg,
-                                          !(reg & lock), 100, 70000));
+                                          reg & lock, 100, 70000));
 }
 
 int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
index 64da7b7..933b5dd 100644 (file)
@@ -428,7 +428,7 @@ static struct tegra_clk_pll_params pll_d_params = {
        .div_nmp = &pllp_nmp,
        .freq_table = pll_d_freq_table,
        .flags = TEGRA_PLL_HAS_CPCON | TEGRA_PLL_SET_LFCON |
-                TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+                TEGRA_PLL_HAS_LOCK_ENABLE,
 };
 
 static struct tegra_clk_pll_params pll_d2_params = {
@@ -446,7 +446,7 @@ static struct tegra_clk_pll_params pll_d2_params = {
        .div_nmp = &pllp_nmp,
        .freq_table = pll_d_freq_table,
        .flags = TEGRA_PLL_HAS_CPCON | TEGRA_PLL_SET_LFCON |
-                TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+                TEGRA_PLL_HAS_LOCK_ENABLE,
 };
 
 static const struct pdiv_map pllu_p[] = {
index 7e3fd37..92f6e4d 100644 (file)
@@ -66,10 +66,10 @@ static void kona_timer_disable_and_clear(void __iomem *base)
 
 }
 
-static void
+static int
 kona_timer_get_counter(void __iomem *timer_base, uint32_t *msw, uint32_t *lsw)
 {
-       int loop_limit = 4;
+       int loop_limit = 3;
 
        /*
         * Read 64-bit free running counter
@@ -83,18 +83,19 @@ kona_timer_get_counter(void __iomem *timer_base, uint32_t *msw, uint32_t *lsw)
         *      if new hi-word is equal to previously read hi-word then stop.
         */
 
-       while (--loop_limit) {
+       do {
                *msw = readl(timer_base + KONA_GPTIMER_STCHI_OFFSET);
                *lsw = readl(timer_base + KONA_GPTIMER_STCLO_OFFSET);
                if (*msw == readl(timer_base + KONA_GPTIMER_STCHI_OFFSET))
                        break;
-       }
+       } while (--loop_limit);
        if (!loop_limit) {
                pr_err("bcm_kona_timer: getting counter failed.\n");
                pr_err(" Timer will be impacted\n");
+               return -ETIMEDOUT;
        }
 
-       return;
+       return 0;
 }
 
 static int kona_timer_set_next_event(unsigned long clc,
@@ -112,8 +113,11 @@ static int kona_timer_set_next_event(unsigned long clc,
 
        uint32_t lsw, msw;
        uint32_t reg;
+       int ret;
 
-       kona_timer_get_counter(timers.tmr_regs, &msw, &lsw);
+       ret = kona_timer_get_counter(timers.tmr_regs, &msw, &lsw);
+       if (ret)
+               return ret;
 
        /* Load the "next" event tick value */
        writel(lsw + clc, timers.tmr_regs + KONA_GPTIMER_STCM0_OFFSET);
index d91e872..b4b3ab5 100644 (file)
@@ -164,7 +164,7 @@ void __init gic_clocksource_init(unsigned int frequency)
        gic_start_count();
 }
 
-static void __init gic_clocksource_of_init(struct device_node *node)
+static int __init gic_clocksource_of_init(struct device_node *node)
 {
        struct clk *clk;
        int ret;
index 937e10b..3e1cb51 100644 (file)
@@ -21,6 +21,8 @@
 #include <linux/of_irq.h>
 #include <linux/sched_clock.h>
 
+#include <clocksource/pxa.h>
+
 #include <asm/div64.h>
 
 #define OSMR0          0x00    /* OS Timer 0 Match Register */
index 97669ee..c83452c 100644 (file)
@@ -123,12 +123,16 @@ static struct clock_event_device sun4i_clockevent = {
        .set_next_event = sun4i_clkevt_next_event,
 };
 
+static void sun4i_timer_clear_interrupt(void)
+{
+       writel(TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_ST_REG);
+}
 
 static irqreturn_t sun4i_timer_interrupt(int irq, void *dev_id)
 {
        struct clock_event_device *evt = (struct clock_event_device *)dev_id;
 
-       writel(0x1, timer_base + TIMER_IRQ_ST_REG);
+       sun4i_timer_clear_interrupt();
        evt->event_handler(evt);
 
        return IRQ_HANDLED;
@@ -208,6 +212,9 @@ static int __init sun4i_timer_init(struct device_node *node)
        /* Make sure timer is stopped before playing with interrupts */
        sun4i_clkevt_time_stop(0);
 
+       /* clear timer0 interrupt */
+       sun4i_timer_clear_interrupt();
+
        sun4i_clockevent.cpumask = cpu_possible_mask;
        sun4i_clockevent.irq = irq;
 
index 719b478..3c39e6f 100644 (file)
@@ -338,7 +338,6 @@ static int __init armada_xp_timer_init(struct device_node *np)
        struct clk *clk = of_clk_get_by_name(np, "fixed");
        int ret;
 
-       clk = of_clk_get(np, 0);
        if (IS_ERR(clk)) {
                pr_err("Failed to get clock");
                return PTR_ERR(clk);
index a7d9a08..a8e6c7d 100644 (file)
@@ -202,10 +202,10 @@ static int __init pistachio_clksrc_of_init(struct device_node *node)
        rate = clk_get_rate(fast_clk);
 
        /* Disable irq's for clocksource usage */
-       gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 0);
-       gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 1);
-       gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 2);
-       gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 3);
+       gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 0);
+       gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 1);
+       gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 2);
+       gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 3);
 
        /* Enable timer block */
        writel(TIMER_ME_GLOBAL, pcs_gpt.base);
index 1ffac0c..7f0f5b2 100644 (file)
@@ -240,6 +240,7 @@ static int __init at91sam926x_pit_common_init(struct pit_data *data)
 static int __init at91sam926x_pit_dt_init(struct device_node *node)
 {
        struct pit_data *data;
+       int ret;
 
        data = kzalloc(sizeof(*data), GFP_KERNEL);
        if (!data)
@@ -261,6 +262,12 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node)
                return PTR_ERR(data->mck);
        }
 
+       ret = clk_prepare_enable(data->mck);
+       if (ret) {
+               pr_err("Unable to enable mck\n");
+               return ret;
+       }
+
        /* Get the interrupts property */
        data->irq = irq_of_parse_and_map(node, 0);
        if (!data->irq) {
index 0bb44d5..2ee40fd 100644 (file)
@@ -74,6 +74,8 @@ static const struct of_device_id machines[] __initconst = {
        { .compatible = "ti,omap5", },
 
        { .compatible = "xlnx,zynq-7000", },
+
+       { }
 };
 
 static int __init cpufreq_dt_platdev_init(void)
index 1af94e2..9b035b7 100644 (file)
@@ -550,4 +550,6 @@ config CRYPTO_DEV_ROCKCHIP
          This driver interfaces with the hardware crypto accelerator.
          Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode.
 
+source "drivers/crypto/chelsio/Kconfig"
+
 endif # CRYPTO_HW
index 3c6432d..ad7250f 100644 (file)
@@ -31,3 +31,4 @@ obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
 obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
 obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/
 obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/
+obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/
index 6dc5971..b304421 100644 (file)
@@ -556,7 +556,10 @@ skip_enc:
 
        /* Read and write assoclen bytes */
        append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-       append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+       if (alg->caam.geniv)
+               append_math_add_imm_u32(desc, VARSEQOUTLEN, REG3, IMM, ivsize);
+       else
+               append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 
        /* Skip assoc data */
        append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
@@ -565,6 +568,14 @@ skip_enc:
        append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
                             KEY_VLF);
 
+       if (alg->caam.geniv) {
+               append_seq_load(desc, ivsize, LDST_CLASS_1_CCB |
+                               LDST_SRCDST_BYTE_CONTEXT |
+                               (ctx1_iv_off << LDST_OFFSET_SHIFT));
+               append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_CLASS2INFIFO |
+                           (ctx1_iv_off << MOVE_OFFSET_SHIFT) | ivsize);
+       }
+
        /* Load Counter into CONTEXT1 reg */
        if (is_rfc3686)
                append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
@@ -2150,7 +2161,7 @@ static void init_authenc_job(struct aead_request *req,
 
        init_aead_job(req, edesc, all_contig, encrypt);
 
-       if (ivsize && (is_rfc3686 || !(alg->caam.geniv && encrypt)))
+       if (ivsize && ((is_rfc3686 && encrypt) || !alg->caam.geniv))
                append_load_as_imm(desc, req->iv, ivsize,
                                   LDST_CLASS_1_CCB |
                                   LDST_SRCDST_BYTE_CONTEXT |
@@ -2537,20 +2548,6 @@ static int aead_decrypt(struct aead_request *req)
        return ret;
 }
 
-static int aead_givdecrypt(struct aead_request *req)
-{
-       struct crypto_aead *aead = crypto_aead_reqtfm(req);
-       unsigned int ivsize = crypto_aead_ivsize(aead);
-
-       if (req->cryptlen < ivsize)
-               return -EINVAL;
-
-       req->cryptlen -= ivsize;
-       req->assoclen += ivsize;
-
-       return aead_decrypt(req);
-}
-
 /*
  * allocate and map the ablkcipher extended descriptor for ablkcipher
  */
@@ -3210,7 +3207,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = AES_BLOCK_SIZE,
                        .maxauthsize = MD5_DIGEST_SIZE,
                },
@@ -3256,7 +3253,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = AES_BLOCK_SIZE,
                        .maxauthsize = SHA1_DIGEST_SIZE,
                },
@@ -3302,7 +3299,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = AES_BLOCK_SIZE,
                        .maxauthsize = SHA224_DIGEST_SIZE,
                },
@@ -3348,7 +3345,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = AES_BLOCK_SIZE,
                        .maxauthsize = SHA256_DIGEST_SIZE,
                },
@@ -3394,7 +3391,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = AES_BLOCK_SIZE,
                        .maxauthsize = SHA384_DIGEST_SIZE,
                },
@@ -3440,7 +3437,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = AES_BLOCK_SIZE,
                        .maxauthsize = SHA512_DIGEST_SIZE,
                },
@@ -3486,7 +3483,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = DES3_EDE_BLOCK_SIZE,
                        .maxauthsize = MD5_DIGEST_SIZE,
                },
@@ -3534,7 +3531,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = DES3_EDE_BLOCK_SIZE,
                        .maxauthsize = SHA1_DIGEST_SIZE,
                },
@@ -3582,7 +3579,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = DES3_EDE_BLOCK_SIZE,
                        .maxauthsize = SHA224_DIGEST_SIZE,
                },
@@ -3630,7 +3627,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = DES3_EDE_BLOCK_SIZE,
                        .maxauthsize = SHA256_DIGEST_SIZE,
                },
@@ -3678,7 +3675,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = DES3_EDE_BLOCK_SIZE,
                        .maxauthsize = SHA384_DIGEST_SIZE,
                },
@@ -3726,7 +3723,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = DES3_EDE_BLOCK_SIZE,
                        .maxauthsize = SHA512_DIGEST_SIZE,
                },
@@ -3772,7 +3769,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = DES_BLOCK_SIZE,
                        .maxauthsize = MD5_DIGEST_SIZE,
                },
@@ -3818,7 +3815,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = DES_BLOCK_SIZE,
                        .maxauthsize = SHA1_DIGEST_SIZE,
                },
@@ -3864,7 +3861,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = DES_BLOCK_SIZE,
                        .maxauthsize = SHA224_DIGEST_SIZE,
                },
@@ -3910,7 +3907,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = DES_BLOCK_SIZE,
                        .maxauthsize = SHA256_DIGEST_SIZE,
                },
@@ -3956,7 +3953,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = DES_BLOCK_SIZE,
                        .maxauthsize = SHA384_DIGEST_SIZE,
                },
@@ -4002,7 +3999,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = DES_BLOCK_SIZE,
                        .maxauthsize = SHA512_DIGEST_SIZE,
                },
@@ -4051,7 +4048,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = CTR_RFC3686_IV_SIZE,
                        .maxauthsize = MD5_DIGEST_SIZE,
                },
@@ -4102,7 +4099,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = CTR_RFC3686_IV_SIZE,
                        .maxauthsize = SHA1_DIGEST_SIZE,
                },
@@ -4153,7 +4150,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = CTR_RFC3686_IV_SIZE,
                        .maxauthsize = SHA224_DIGEST_SIZE,
                },
@@ -4204,7 +4201,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = CTR_RFC3686_IV_SIZE,
                        .maxauthsize = SHA256_DIGEST_SIZE,
                },
@@ -4255,7 +4252,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = CTR_RFC3686_IV_SIZE,
                        .maxauthsize = SHA384_DIGEST_SIZE,
                },
@@ -4306,7 +4303,7 @@ static struct caam_aead_alg driver_aeads[] = {
                        .setkey = aead_setkey,
                        .setauthsize = aead_setauthsize,
                        .encrypt = aead_encrypt,
-                       .decrypt = aead_givdecrypt,
+                       .decrypt = aead_decrypt,
                        .ivsize = CTR_RFC3686_IV_SIZE,
                        .maxauthsize = SHA512_DIGEST_SIZE,
                },
diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig
new file mode 100644 (file)
index 0000000..4ce67fb
--- /dev/null
@@ -0,0 +1,19 @@
+config CRYPTO_DEV_CHELSIO
+       tristate "Chelsio Crypto Co-processor Driver"
+       depends on CHELSIO_T4
+       select CRYPTO_SHA1
+       select CRYPTO_SHA256
+       select CRYPTO_SHA512
+       ---help---
+         The Chelsio Crypto Co-processor driver for T6 adapters.
+
+         For general information about Chelsio and our products, visit
+         our website at <http://www.chelsio.com>.
+
+         For customer support, please visit our customer support page at
+         <http://www.chelsio.com/support.html>.
+
+         Please send feedback to <linux-bugs@chelsio.com>.
+
+         To compile this driver as a module, choose M here: the module
+         will be called chcr.
diff --git a/drivers/crypto/chelsio/Makefile b/drivers/crypto/chelsio/Makefile
new file mode 100644 (file)
index 0000000..bebdf06
--- /dev/null
@@ -0,0 +1,4 @@
+ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
+
+obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chcr.o
+chcr-objs :=  chcr_core.o chcr_algo.o
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
new file mode 100644 (file)
index 0000000..e4ddb92
--- /dev/null
@@ -0,0 +1,1525 @@
+/*
+ * This file is part of the Chelsio T6 Crypto driver for Linux.
+ *
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Written and Maintained by:
+ *     Manoj Malviya (manojmalviya@chelsio.com)
+ *     Atul Gupta (atul.gupta@chelsio.com)
+ *     Jitendra Lulla (jlulla@chelsio.com)
+ *     Yeshaswi M R Gowda (yeshaswi@chelsio.com)
+ *     Harsh Jain (harsh@chelsio.com)
+ */
+
+#define pr_fmt(fmt) "chcr:" fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/highmem.h>
+#include <linux/scatterlist.h>
+
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <crypto/internal/hash.h>
+
+#include "t4fw_api.h"
+#include "t4_msg.h"
+#include "chcr_core.h"
+#include "chcr_algo.h"
+#include "chcr_crypto.h"
+
+static inline struct ablk_ctx *ABLK_CTX(struct chcr_context *ctx)
+{
+       return ctx->crypto_ctx->ablkctx;
+}
+
+static inline struct hmac_ctx *HMAC_CTX(struct chcr_context *ctx)
+{
+       return ctx->crypto_ctx->hmacctx;
+}
+
+static inline struct uld_ctx *ULD_CTX(struct chcr_context *ctx)
+{
+       return ctx->dev->u_ctx;
+}
+
+static inline int is_ofld_imm(const struct sk_buff *skb)
+{
+       return (skb->len <= CRYPTO_MAX_IMM_TX_PKT_LEN);
+}
+
+/*
+ *     sgl_len - calculates the size of an SGL of the given capacity
+ *     @n: the number of SGL entries
+ *     Calculates the number of flits needed for a scatter/gather list that
+ *     can hold the given number of entries.
+ */
+static inline unsigned int sgl_len(unsigned int n)
+{
+       n--;
+       return (3 * n) / 2 + (n & 1) + 2;
+}
+
+/*
+ *     chcr_handle_resp - Unmap the DMA buffers associated with the request
+ *     @req: crypto request
+ */
+int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
+                    int error_status)
+{
+       struct crypto_tfm *tfm = req->tfm;
+       struct chcr_context *ctx = crypto_tfm_ctx(tfm);
+       struct uld_ctx *u_ctx = ULD_CTX(ctx);
+       struct chcr_req_ctx ctx_req;
+       struct cpl_fw6_pld *fw6_pld;
+       unsigned int digestsize, updated_digestsize;
+
+       switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
+       case CRYPTO_ALG_TYPE_BLKCIPHER:
+               ctx_req.req.ablk_req = (struct ablkcipher_request *)req;
+               ctx_req.ctx.ablk_ctx =
+                       ablkcipher_request_ctx(ctx_req.req.ablk_req);
+               if (!error_status) {
+                       fw6_pld = (struct cpl_fw6_pld *)input;
+                       memcpy(ctx_req.req.ablk_req->info, &fw6_pld->data[2],
+                              AES_BLOCK_SIZE);
+               }
+               dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.req.ablk_req->dst,
+                            ABLK_CTX(ctx)->dst_nents, DMA_FROM_DEVICE);
+               if (ctx_req.ctx.ablk_ctx->skb) {
+                       kfree_skb(ctx_req.ctx.ablk_ctx->skb);
+                       ctx_req.ctx.ablk_ctx->skb = NULL;
+               }
+               break;
+
+       case CRYPTO_ALG_TYPE_AHASH:
+               ctx_req.req.ahash_req = (struct ahash_request *)req;
+               ctx_req.ctx.ahash_ctx =
+                       ahash_request_ctx(ctx_req.req.ahash_req);
+               digestsize =
+                       crypto_ahash_digestsize(crypto_ahash_reqtfm(
+                                                       ctx_req.req.ahash_req));
+               updated_digestsize = digestsize;
+               if (digestsize == SHA224_DIGEST_SIZE)
+                       updated_digestsize = SHA256_DIGEST_SIZE;
+               else if (digestsize == SHA384_DIGEST_SIZE)
+                       updated_digestsize = SHA512_DIGEST_SIZE;
+               if (ctx_req.ctx.ahash_ctx->skb)
+                       ctx_req.ctx.ahash_ctx->skb = NULL;
+               if (ctx_req.ctx.ahash_ctx->result == 1) {
+                       ctx_req.ctx.ahash_ctx->result = 0;
+                       memcpy(ctx_req.req.ahash_req->result, input +
+                              sizeof(struct cpl_fw6_pld),
+                              digestsize);
+               } else {
+                       memcpy(ctx_req.ctx.ahash_ctx->partial_hash, input +
+                              sizeof(struct cpl_fw6_pld),
+                              updated_digestsize);
+               }
+               kfree(ctx_req.ctx.ahash_ctx->dummy_payload_ptr);
+               ctx_req.ctx.ahash_ctx->dummy_payload_ptr = NULL;
+               break;
+       }
+       return 0;
+}
+
+/*
+ *     calc_tx_flits_ofld - calculate # of flits for an offload packet
+ *     @skb: the packet
+ *     Returns the number of flits needed for the given offload packet.
+ *     These packets are already fully constructed and no additional headers
+ *     will be added.
+ */
+static inline unsigned int calc_tx_flits_ofld(const struct sk_buff *skb)
+{
+       unsigned int flits, cnt;
+
+       if (is_ofld_imm(skb))
+               return DIV_ROUND_UP(skb->len, 8);
+
+       flits = skb_transport_offset(skb) / 8;   /* headers */
+       cnt = skb_shinfo(skb)->nr_frags;
+       if (skb_tail_pointer(skb) != skb_transport_header(skb))
+               cnt++;
+       return flits + sgl_len(cnt);
+}
+
+static struct shash_desc *chcr_alloc_shash(unsigned int ds)
+{
+       struct crypto_shash *base_hash = NULL;
+       struct shash_desc *desc;
+
+       switch (ds) {
+       case SHA1_DIGEST_SIZE:
+               base_hash = crypto_alloc_shash("sha1-generic", 0, 0);
+               break;
+       case SHA224_DIGEST_SIZE:
+               base_hash = crypto_alloc_shash("sha224-generic", 0, 0);
+               break;
+       case SHA256_DIGEST_SIZE:
+               base_hash = crypto_alloc_shash("sha256-generic", 0, 0);
+               break;
+       case SHA384_DIGEST_SIZE:
+               base_hash = crypto_alloc_shash("sha384-generic", 0, 0);
+               break;
+       case SHA512_DIGEST_SIZE:
+               base_hash = crypto_alloc_shash("sha512-generic", 0, 0);
+               break;
+       }
+       if (IS_ERR(base_hash)) {
+               pr_err("Can not allocate sha-generic algo.\n");
+               return (void *)base_hash;
+       }
+
+       desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(base_hash),
+                      GFP_KERNEL);
+       if (!desc)
+               return ERR_PTR(-ENOMEM);
+       desc->tfm = base_hash;
+       desc->flags = crypto_shash_get_flags(base_hash);
+       return desc;
+}
+
+static int chcr_compute_partial_hash(struct shash_desc *desc,
+                                    char *iopad, char *result_hash,
+                                    int digest_size)
+{
+       struct sha1_state sha1_st;
+       struct sha256_state sha256_st;
+       struct sha512_state sha512_st;
+       int error;
+
+       if (digest_size == SHA1_DIGEST_SIZE) {
+               error = crypto_shash_init(desc) ?:
+                       crypto_shash_update(desc, iopad, SHA1_BLOCK_SIZE) ?:
+                       crypto_shash_export(desc, (void *)&sha1_st);
+               memcpy(result_hash, sha1_st.state, SHA1_DIGEST_SIZE);
+       } else if (digest_size == SHA224_DIGEST_SIZE) {
+               error = crypto_shash_init(desc) ?:
+                       crypto_shash_update(desc, iopad, SHA256_BLOCK_SIZE) ?:
+                       crypto_shash_export(desc, (void *)&sha256_st);
+               memcpy(result_hash, sha256_st.state, SHA256_DIGEST_SIZE);
+
+       } else if (digest_size == SHA256_DIGEST_SIZE) {
+               error = crypto_shash_init(desc) ?:
+                       crypto_shash_update(desc, iopad, SHA256_BLOCK_SIZE) ?:
+                       crypto_shash_export(desc, (void *)&sha256_st);
+               memcpy(result_hash, sha256_st.state, SHA256_DIGEST_SIZE);
+
+       } else if (digest_size == SHA384_DIGEST_SIZE) {
+               error = crypto_shash_init(desc) ?:
+                       crypto_shash_update(desc, iopad, SHA512_BLOCK_SIZE) ?:
+                       crypto_shash_export(desc, (void *)&sha512_st);
+               memcpy(result_hash, sha512_st.state, SHA512_DIGEST_SIZE);
+
+       } else if (digest_size == SHA512_DIGEST_SIZE) {
+               error = crypto_shash_init(desc) ?:
+                       crypto_shash_update(desc, iopad, SHA512_BLOCK_SIZE) ?:
+                       crypto_shash_export(desc, (void *)&sha512_st);
+               memcpy(result_hash, sha512_st.state, SHA512_DIGEST_SIZE);
+       } else {
+               error = -EINVAL;
+               pr_err("Unknown digest size %d\n", digest_size);
+       }
+       return error;
+}
+
+static void chcr_change_order(char *buf, int ds)
+{
+       int i;
+
+       if (ds == SHA512_DIGEST_SIZE) {
+               for (i = 0; i < (ds / sizeof(u64)); i++)
+                       *((__be64 *)buf + i) =
+                               cpu_to_be64(*((u64 *)buf + i));
+       } else {
+               for (i = 0; i < (ds / sizeof(u32)); i++)
+                       *((__be32 *)buf + i) =
+                               cpu_to_be32(*((u32 *)buf + i));
+       }
+}
+
+static inline int is_hmac(struct crypto_tfm *tfm)
+{
+       struct crypto_alg *alg = tfm->__crt_alg;
+       struct chcr_alg_template *chcr_crypto_alg =
+               container_of(__crypto_ahash_alg(alg), struct chcr_alg_template,
+                            alg.hash);
+       if ((chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK) ==
+           CRYPTO_ALG_SUB_TYPE_HASH_HMAC)
+               return 1;
+       return 0;
+}
+
+static inline unsigned int ch_nents(struct scatterlist *sg,
+                                   unsigned int *total_size)
+{
+       unsigned int nents;
+
+       for (nents = 0, *total_size = 0; sg; sg = sg_next(sg)) {
+               nents++;
+               *total_size += sg->length;
+       }
+       return nents;
+}
+
+static void write_phys_cpl(struct cpl_rx_phys_dsgl *phys_cpl,
+                          struct scatterlist *sg,
+                          struct phys_sge_parm *sg_param)
+{
+       struct phys_sge_pairs *to;
+       unsigned int out_buf_size = sg_param->obsize;
+       unsigned int nents = sg_param->nents, i, j, tot_len = 0;
+
+       phys_cpl->op_to_tid = htonl(CPL_RX_PHYS_DSGL_OPCODE_V(CPL_RX_PHYS_DSGL)
+                                   | CPL_RX_PHYS_DSGL_ISRDMA_V(0));
+       phys_cpl->pcirlxorder_to_noofsgentr =
+               htonl(CPL_RX_PHYS_DSGL_PCIRLXORDER_V(0) |
+                     CPL_RX_PHYS_DSGL_PCINOSNOOP_V(0) |
+                     CPL_RX_PHYS_DSGL_PCITPHNTENB_V(0) |
+                     CPL_RX_PHYS_DSGL_PCITPHNT_V(0) |
+                     CPL_RX_PHYS_DSGL_DCAID_V(0) |
+                     CPL_RX_PHYS_DSGL_NOOFSGENTR_V(nents));
+       phys_cpl->rss_hdr_int.opcode = CPL_RX_PHYS_ADDR;
+       phys_cpl->rss_hdr_int.qid = htons(sg_param->qid);
+       phys_cpl->rss_hdr_int.hash_val = 0;
+       to = (struct phys_sge_pairs *)((unsigned char *)phys_cpl +
+                                      sizeof(struct cpl_rx_phys_dsgl));
+
+       for (i = 0; nents; to++) {
+               for (j = i; (nents && (j < (8 + i))); j++, nents--) {
+                       to->len[j] = htons(sg->length);
+                       to->addr[j] = cpu_to_be64(sg_dma_address(sg));
+                       if (out_buf_size) {
+                               if (tot_len + sg_dma_len(sg) >= out_buf_size) {
+                                       to->len[j] = htons(out_buf_size -
+                                                          tot_len);
+                                       return;
+                               }
+                               tot_len += sg_dma_len(sg);
+                       }
+                       sg = sg_next(sg);
+               }
+       }
+}
+
+static inline unsigned
+int map_writesg_phys_cpl(struct device *dev, struct cpl_rx_phys_dsgl *phys_cpl,
+                        struct scatterlist *sg, struct phys_sge_parm *sg_param)
+{
+       if (!sg || !sg_param->nents)
+               return 0;
+
+       sg_param->nents = dma_map_sg(dev, sg, sg_param->nents, DMA_FROM_DEVICE);
+       if (sg_param->nents == 0) {
+               pr_err("CHCR : DMA mapping failed\n");
+               return -EINVAL;
+       }
+       write_phys_cpl(phys_cpl, sg, sg_param);
+       return 0;
+}
+
+static inline int get_cryptoalg_subtype(struct crypto_tfm *tfm)
+{
+       struct crypto_alg *alg = tfm->__crt_alg;
+       struct chcr_alg_template *chcr_crypto_alg =
+               container_of(alg, struct chcr_alg_template, alg.crypto);
+
+       return chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK;
+}
+
+static inline void
+write_sg_data_page_desc(struct sk_buff *skb, unsigned int *frags,
+                       struct scatterlist *sg, unsigned int count)
+{
+       struct page *spage;
+       unsigned int page_len;
+
+       skb->len += count;
+       skb->data_len += count;
+       skb->truesize += count;
+       while (count > 0) {
+               if (sg && (!(sg->length)))
+                       break;
+               spage = sg_page(sg);
+               get_page(spage);
+               page_len = min(sg->length, count);
+               skb_fill_page_desc(skb, *frags, spage, sg->offset, page_len);
+               (*frags)++;
+               count -= page_len;
+               sg = sg_next(sg);
+       }
+}
+
+static int generate_copy_rrkey(struct ablk_ctx *ablkctx,
+                              struct _key_ctx *key_ctx)
+{
+       if (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) {
+               get_aes_decrypt_key(key_ctx->key, ablkctx->key,
+                                   ablkctx->enckey_len << 3);
+               memset(key_ctx->key + ablkctx->enckey_len, 0,
+                      CHCR_AES_MAX_KEY_LEN - ablkctx->enckey_len);
+       } else {
+               memcpy(key_ctx->key,
+                      ablkctx->key + (ablkctx->enckey_len >> 1),
+                      ablkctx->enckey_len >> 1);
+               get_aes_decrypt_key(key_ctx->key + (ablkctx->enckey_len >> 1),
+                                   ablkctx->key, ablkctx->enckey_len << 2);
+       }
+       return 0;
+}
+
+static inline void create_wreq(struct chcr_context *ctx,
+                              struct fw_crypto_lookaside_wr *wreq,
+                              void *req, struct sk_buff *skb,
+                              int kctx_len, int hash_sz,
+                              unsigned int phys_dsgl)
+{
+       struct uld_ctx *u_ctx = ULD_CTX(ctx);
+       struct ulp_txpkt *ulptx = (struct ulp_txpkt *)(wreq + 1);
+       struct ulptx_idata *sc_imm = (struct ulptx_idata *)(ulptx + 1);
+       int iv_loc = IV_DSGL;
+       int qid = u_ctx->lldi.rxq_ids[ctx->tx_channel_id];
+       unsigned int immdatalen = 0, nr_frags = 0;
+
+       if (is_ofld_imm(skb)) {
+               immdatalen = skb->data_len;
+               iv_loc = IV_IMMEDIATE;
+       } else {
+               nr_frags = skb_shinfo(skb)->nr_frags;
+       }
+
+       wreq->op_to_cctx_size = FILL_WR_OP_CCTX_SIZE(immdatalen,
+                                                    (kctx_len >> 4));
+       wreq->pld_size_hash_size =
+               htonl(FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_V(sgl_lengths[nr_frags]) |
+                     FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_V(hash_sz));
+       wreq->len16_pkd = htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP(
+                                   (calc_tx_flits_ofld(skb) * 8), 16)));
+       wreq->cookie = cpu_to_be64((uintptr_t)req);
+       wreq->rx_chid_to_rx_q_id =
+               FILL_WR_RX_Q_ID(ctx->dev->tx_channel_id, qid,
+                               (hash_sz) ? IV_NOP : iv_loc);
+
+       ulptx->cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id);
+       ulptx->len = htonl((DIV_ROUND_UP((calc_tx_flits_ofld(skb) * 8),
+                                        16) - ((sizeof(*wreq)) >> 4)));
+
+       sc_imm->cmd_more = FILL_CMD_MORE(immdatalen);
+       sc_imm->len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) + kctx_len +
+                                 ((hash_sz) ? DUMMY_BYTES :
+                                 (sizeof(struct cpl_rx_phys_dsgl) +
+                                  phys_dsgl)) + immdatalen);
+}
+
+/**
+ *     create_cipher_wr - form the WR for cipher operations
+ *     @req: cipher req.
+ *     @ctx: crypto driver context of the request.
+ *     @qid: ingress qid where response of this WR should be received.
+ *     @op_type:       encryption or decryption
+ */
+static struct sk_buff
+*create_cipher_wr(struct crypto_async_request *req_base,
+                 struct chcr_context *ctx, unsigned short qid,
+                 unsigned short op_type)
+{
+       struct ablkcipher_request *req = (struct ablkcipher_request *)req_base;
+       struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+       struct uld_ctx *u_ctx = ULD_CTX(ctx);
+       struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+       struct sk_buff *skb = NULL;
+       struct _key_ctx *key_ctx;
+       struct fw_crypto_lookaside_wr *wreq;
+       struct cpl_tx_sec_pdu *sec_cpl;
+       struct cpl_rx_phys_dsgl *phys_cpl;
+       struct chcr_blkcipher_req_ctx *req_ctx = ablkcipher_request_ctx(req);
+       struct phys_sge_parm sg_param;
+       unsigned int frags = 0, transhdr_len, phys_dsgl, dst_bufsize = 0;
+       unsigned int ivsize = crypto_ablkcipher_ivsize(tfm), kctx_len;
+
+       if (!req->info)
+               return ERR_PTR(-EINVAL);
+       ablkctx->dst_nents = ch_nents(req->dst, &dst_bufsize);
+       ablkctx->enc = op_type;
+
+       if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) ||
+           (req->nbytes <= 0) || (req->nbytes % AES_BLOCK_SIZE))
+               return ERR_PTR(-EINVAL);
+
+       phys_dsgl = get_space_for_phys_dsgl(ablkctx->dst_nents);
+
+       kctx_len = sizeof(*key_ctx) +
+               (DIV_ROUND_UP(ablkctx->enckey_len, 16) * 16);
+       transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, phys_dsgl);
+       skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)),
+                       GFP_ATOMIC);
+       if (!skb)
+               return ERR_PTR(-ENOMEM);
+       skb_reserve(skb, sizeof(struct sge_opaque_hdr));
+       wreq = (struct fw_crypto_lookaside_wr *)__skb_put(skb, transhdr_len);
+
+       sec_cpl = (struct cpl_tx_sec_pdu *)((u8 *)wreq + SEC_CPL_OFFSET);
+       sec_cpl->op_ivinsrtofst =
+               FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 1, 1);
+
+       sec_cpl->pldlen = htonl(ivsize + req->nbytes);
+       sec_cpl->aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(0, 0,
+                                                               ivsize + 1, 0);
+
+       sec_cpl->cipherstop_lo_authinsert =  FILL_SEC_CPL_AUTHINSERT(0, 0,
+                                                                    0, 0);
+       sec_cpl->seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type, 0,
+                                                        ablkctx->ciph_mode,
+                                                        0, 0, ivsize >> 1, 1);
+       sec_cpl->ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 0,
+                                                         0, 1, phys_dsgl);
+
+       key_ctx = (struct _key_ctx *)((u8 *)sec_cpl + sizeof(*sec_cpl));
+       key_ctx->ctx_hdr = ablkctx->key_ctx_hdr;
+       if (op_type == CHCR_DECRYPT_OP) {
+               if (generate_copy_rrkey(ablkctx, key_ctx))
+                       goto map_fail1;
+       } else {
+               if (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) {
+                       memcpy(key_ctx->key, ablkctx->key, ablkctx->enckey_len);
+               } else {
+                       memcpy(key_ctx->key, ablkctx->key +
+                              (ablkctx->enckey_len >> 1),
+                              ablkctx->enckey_len >> 1);
+                       memcpy(key_ctx->key +
+                              (ablkctx->enckey_len >> 1),
+                              ablkctx->key,
+                              ablkctx->enckey_len >> 1);
+               }
+       }
+       phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)key_ctx + kctx_len);
+
+       memcpy(ablkctx->iv, req->info, ivsize);
+       sg_init_table(&ablkctx->iv_sg, 1);
+       sg_set_buf(&ablkctx->iv_sg, ablkctx->iv, ivsize);
+       sg_param.nents = ablkctx->dst_nents;
+       sg_param.obsize = dst_bufsize;
+       sg_param.qid = qid;
+       sg_param.align = 1;
+       if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, req->dst,
+                                &sg_param))
+               goto map_fail1;
+
+       skb_set_transport_header(skb, transhdr_len);
+       write_sg_data_page_desc(skb, &frags, &ablkctx->iv_sg, ivsize);
+       write_sg_data_page_desc(skb, &frags, req->src, req->nbytes);
+       create_wreq(ctx, wreq, req, skb, kctx_len, 0, phys_dsgl);
+       req_ctx->skb = skb;
+       skb_get(skb);
+       return skb;
+map_fail1:
+       kfree_skb(skb);
+       return ERR_PTR(-ENOMEM);
+}
+
+static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+                              unsigned int keylen)
+{
+       struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+       struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+       struct ablkcipher_alg *alg = crypto_ablkcipher_alg(tfm);
+       unsigned int ck_size, context_size;
+       u16 alignment = 0;
+
+       if ((keylen < alg->min_keysize) || (keylen > alg->max_keysize))
+               goto badkey_err;
+
+       memcpy(ablkctx->key, key, keylen);
+       ablkctx->enckey_len = keylen;
+       if (keylen == AES_KEYSIZE_128) {
+               ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+       } else if (keylen == AES_KEYSIZE_192) {
+               alignment = 8;
+               ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
+       } else if (keylen == AES_KEYSIZE_256) {
+               ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+       } else {
+               goto badkey_err;
+       }
+
+       context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD +
+                       keylen + alignment) >> 4;
+
+       ablkctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, CHCR_KEYCTX_NO_KEY,
+                                               0, 0, context_size);
+       ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_CBC;
+       return 0;
+badkey_err:
+       crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+       ablkctx->enckey_len = 0;
+       return -EINVAL;
+}
+
+static int cxgb4_is_crypto_q_full(struct net_device *dev, unsigned int idx)
+{
+       int ret = 0;
+       struct sge_ofld_txq *q;
+       struct adapter *adap = netdev2adap(dev);
+
+       local_bh_disable();
+       q = &adap->sge.ofldtxq[idx];
+       spin_lock(&q->sendq.lock);
+       if (q->full)
+               ret = -1;
+       spin_unlock(&q->sendq.lock);
+       local_bh_enable();
+       return ret;
+}
+
+static int chcr_aes_encrypt(struct ablkcipher_request *req)
+{
+       struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+       struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+       struct crypto_async_request *req_base = &req->base;
+       struct uld_ctx *u_ctx = ULD_CTX(ctx);
+       struct sk_buff *skb;
+
+       if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
+                                           ctx->tx_channel_id))) {
+               if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+                       return -EBUSY;
+       }
+
+       skb = create_cipher_wr(req_base, ctx,
+                              u_ctx->lldi.rxq_ids[ctx->tx_channel_id],
+                              CHCR_ENCRYPT_OP);
+       if (IS_ERR(skb)) {
+               pr_err("chcr : %s : Failed to form WR. No memory\n", __func__);
+               return  PTR_ERR(skb);
+       }
+       skb->dev = u_ctx->lldi.ports[0];
+       set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
+       chcr_send_wr(skb);
+       return -EINPROGRESS;
+}
+
+static int chcr_aes_decrypt(struct ablkcipher_request *req)
+{
+       struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+       struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+       struct crypto_async_request *req_base = &req->base;
+       struct uld_ctx *u_ctx = ULD_CTX(ctx);
+       struct sk_buff *skb;
+
+       if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
+                                           ctx->tx_channel_id))) {
+               if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+                       return -EBUSY;
+       }
+
+       skb = create_cipher_wr(req_base, ctx, u_ctx->lldi.rxq_ids[0],
+                              CHCR_DECRYPT_OP);
+       if (IS_ERR(skb)) {
+               pr_err("chcr : %s : Failed to form WR. No memory\n", __func__);
+               return PTR_ERR(skb);
+       }
+       skb->dev = u_ctx->lldi.ports[0];
+       set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
+       chcr_send_wr(skb);
+       return -EINPROGRESS;
+}
+
+static int chcr_device_init(struct chcr_context *ctx)
+{
+       struct uld_ctx *u_ctx;
+       unsigned int id;
+       int err = 0, rxq_perchan, rxq_idx;
+
+       id = smp_processor_id();
+       if (!ctx->dev) {
+               err = assign_chcr_device(&ctx->dev);
+               if (err) {
+                       pr_err("chcr device assignment fails\n");
+                       goto out;
+               }
+               u_ctx = ULD_CTX(ctx);
+               rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan;
+               ctx->dev->tx_channel_id = 0;
+               rxq_idx = ctx->dev->tx_channel_id * rxq_perchan;
+               rxq_idx += id % rxq_perchan;
+               spin_lock(&ctx->dev->lock_chcr_dev);
+               ctx->tx_channel_id = rxq_idx;
+               spin_unlock(&ctx->dev->lock_chcr_dev);
+       }
+out:
+       return err;
+}
+
+static int chcr_cra_init(struct crypto_tfm *tfm)
+{
+       tfm->crt_ablkcipher.reqsize =  sizeof(struct chcr_blkcipher_req_ctx);
+       return chcr_device_init(crypto_tfm_ctx(tfm));
+}
+
+static int get_alg_config(struct algo_param *params,
+                         unsigned int auth_size)
+{
+       switch (auth_size) {
+       case SHA1_DIGEST_SIZE:
+               params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_160;
+               params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA1;
+               params->result_size = SHA1_DIGEST_SIZE;
+               break;
+       case SHA224_DIGEST_SIZE:
+               params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256;
+               params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA224;
+               params->result_size = SHA256_DIGEST_SIZE;
+               break;
+       case SHA256_DIGEST_SIZE:
+               params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256;
+               params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA256;
+               params->result_size = SHA256_DIGEST_SIZE;
+               break;
+       case SHA384_DIGEST_SIZE:
+               params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_512;
+               params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA512_384;
+               params->result_size = SHA512_DIGEST_SIZE;
+               break;
+       case SHA512_DIGEST_SIZE:
+               params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_512;
+               params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA512_512;
+               params->result_size = SHA512_DIGEST_SIZE;
+               break;
+       default:
+               pr_err("chcr : ERROR, unsupported digest size\n");
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static inline int
+write_buffer_data_page_desc(struct chcr_ahash_req_ctx *req_ctx,
+                           struct sk_buff *skb, unsigned int *frags, char *bfr,
+                           u8 bfr_len)
+{
+       void *page_ptr = NULL;
+
+       skb->len += bfr_len;
+       skb->data_len += bfr_len;
+       skb->truesize += bfr_len;
+       page_ptr = kmalloc(CHCR_HASH_MAX_BLOCK_SIZE_128, GFP_ATOMIC | GFP_DMA);
+       if (!page_ptr)
+               return -ENOMEM;
+       get_page(virt_to_page(page_ptr));
+       req_ctx->dummy_payload_ptr = page_ptr;
+       memcpy(page_ptr, bfr, bfr_len);
+       skb_fill_page_desc(skb, *frags, virt_to_page(page_ptr),
+                          offset_in_page(page_ptr), bfr_len);
+       (*frags)++;
+       return 0;
+}
+
+/**
+ *     create_final_hash_wr - Create hash work request
+ *     @req - Cipher req base
+ */
+static struct sk_buff *create_final_hash_wr(struct ahash_request *req,
+                                           struct hash_wr_param *param)
+{
+       struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+       struct hmac_ctx *hmacctx = HMAC_CTX(ctx);
+       struct sk_buff *skb = NULL;
+       struct _key_ctx *key_ctx;
+       struct fw_crypto_lookaside_wr *wreq;
+       struct cpl_tx_sec_pdu *sec_cpl;
+       unsigned int frags = 0, transhdr_len, iopad_alignment = 0;
+       unsigned int digestsize = crypto_ahash_digestsize(tfm);
+       unsigned int kctx_len = sizeof(*key_ctx);
+       u8 hash_size_in_response = 0;
+
+       iopad_alignment = KEYCTX_ALIGN_PAD(digestsize);
+       kctx_len += param->alg_prm.result_size + iopad_alignment;
+       if (param->opad_needed)
+               kctx_len += param->alg_prm.result_size + iopad_alignment;
+
+       if (req_ctx->result)
+               hash_size_in_response = digestsize;
+       else
+               hash_size_in_response = param->alg_prm.result_size;
+       transhdr_len = HASH_TRANSHDR_SIZE(kctx_len);
+       skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)),
+                       GFP_ATOMIC);
+       if (!skb)
+               return skb;
+
+       skb_reserve(skb, sizeof(struct sge_opaque_hdr));
+       wreq = (struct fw_crypto_lookaside_wr *)__skb_put(skb, transhdr_len);
+       memset(wreq, 0, transhdr_len);
+
+       sec_cpl = (struct cpl_tx_sec_pdu *)((u8 *)wreq + SEC_CPL_OFFSET);
+       sec_cpl->op_ivinsrtofst =
+               FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 0, 0);
+       sec_cpl->pldlen = htonl(param->bfr_len + param->sg_len);
+
+       sec_cpl->aadstart_cipherstop_hi =
+               FILL_SEC_CPL_CIPHERSTOP_HI(0, 0, 0, 0);
+       sec_cpl->cipherstop_lo_authinsert =
+               FILL_SEC_CPL_AUTHINSERT(0, 1, 0, 0);
+       sec_cpl->seqno_numivs =
+               FILL_SEC_CPL_SCMD0_SEQNO(0, 0, 0, param->alg_prm.auth_mode,
+                                        param->opad_needed, 0, 0);
+
+       sec_cpl->ivgen_hdrlen =
+               FILL_SEC_CPL_IVGEN_HDRLEN(param->last, param->more, 0, 1, 0, 0);
+
+       key_ctx = (struct _key_ctx *)((u8 *)sec_cpl + sizeof(*sec_cpl));
+       memcpy(key_ctx->key, req_ctx->partial_hash, param->alg_prm.result_size);
+
+       if (param->opad_needed)
+               memcpy(key_ctx->key + ((param->alg_prm.result_size <= 32) ? 32 :
+                                      CHCR_HASH_MAX_DIGEST_SIZE),
+                      hmacctx->opad, param->alg_prm.result_size);
+
+       key_ctx->ctx_hdr = FILL_KEY_CTX_HDR(CHCR_KEYCTX_NO_KEY,
+                                           param->alg_prm.mk_size, 0,
+                                           param->opad_needed,
+                                           (kctx_len >> 4));
+       sec_cpl->scmd1 = cpu_to_be64((u64)param->scmd1);
+
+       skb_set_transport_header(skb, transhdr_len);
+       if (param->bfr_len != 0)
+               write_buffer_data_page_desc(req_ctx, skb, &frags, req_ctx->bfr,
+                                           param->bfr_len);
+       if (param->sg_len != 0)
+               write_sg_data_page_desc(skb, &frags, req->src, param->sg_len);
+
+       create_wreq(ctx, wreq, req, skb, kctx_len, hash_size_in_response,
+                   0);
+       req_ctx->skb = skb;
+       skb_get(skb);
+       return skb;
+}
+
+static int chcr_ahash_update(struct ahash_request *req)
+{
+       struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
+       struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req);
+       struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm));
+       struct uld_ctx *u_ctx = NULL;
+       struct sk_buff *skb;
+       u8 remainder = 0, bs;
+       unsigned int nbytes = req->nbytes;
+       struct hash_wr_param params;
+
+       bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
+
+       u_ctx = ULD_CTX(ctx);
+       if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
+                                           ctx->tx_channel_id))) {
+               if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+                       return -EBUSY;
+       }
+
+       if (nbytes + req_ctx->bfr_len >= bs) {
+               remainder = (nbytes + req_ctx->bfr_len) % bs;
+               nbytes = nbytes + req_ctx->bfr_len - remainder;
+       } else {
+               sg_pcopy_to_buffer(req->src, sg_nents(req->src), req_ctx->bfr +
+                                  req_ctx->bfr_len, nbytes, 0);
+               req_ctx->bfr_len += nbytes;
+               return 0;
+       }
+
+       params.opad_needed = 0;
+       params.more = 1;
+       params.last = 0;
+       params.sg_len = nbytes - req_ctx->bfr_len;
+       params.bfr_len = req_ctx->bfr_len;
+       params.scmd1 = 0;
+       get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
+       req_ctx->result = 0;
+       req_ctx->data_len += params.sg_len + params.bfr_len;
+       skb = create_final_hash_wr(req, &params);
+       if (!skb)
+               return -ENOMEM;
+
+       req_ctx->bfr_len = remainder;
+       if (remainder)
+               sg_pcopy_to_buffer(req->src, sg_nents(req->src),
+                                  req_ctx->bfr, remainder, req->nbytes -
+                                  remainder);
+       skb->dev = u_ctx->lldi.ports[0];
+       set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
+       chcr_send_wr(skb);
+
+       return -EINPROGRESS;
+}
+
+static void create_last_hash_block(char *bfr_ptr, unsigned int bs, u64 scmd1)
+{
+       memset(bfr_ptr, 0, bs);
+       *bfr_ptr = 0x80;
+       if (bs == 64)
+               *(__be64 *)(bfr_ptr + 56) = cpu_to_be64(scmd1  << 3);
+       else
+               *(__be64 *)(bfr_ptr + 120) =  cpu_to_be64(scmd1  << 3);
+}
+
+static int chcr_ahash_final(struct ahash_request *req)
+{
+       struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
+       struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req);
+       struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm));
+       struct hash_wr_param params;
+       struct sk_buff *skb;
+       struct uld_ctx *u_ctx = NULL;
+       u8 bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
+
+       u_ctx = ULD_CTX(ctx);
+       if (is_hmac(crypto_ahash_tfm(rtfm)))
+               params.opad_needed = 1;
+       else
+               params.opad_needed = 0;
+       params.sg_len = 0;
+       get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
+       req_ctx->result = 1;
+       params.bfr_len = req_ctx->bfr_len;
+       req_ctx->data_len += params.bfr_len + params.sg_len;
+       if (req_ctx->bfr && (req_ctx->bfr_len == 0)) {
+               create_last_hash_block(req_ctx->bfr, bs, req_ctx->data_len);
+               params.last = 0;
+               params.more = 1;
+               params.scmd1 = 0;
+               params.bfr_len = bs;
+
+       } else {
+               params.scmd1 = req_ctx->data_len;
+               params.last = 1;
+               params.more = 0;
+       }
+       skb = create_final_hash_wr(req, &params);
+       skb->dev = u_ctx->lldi.ports[0];
+       set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
+       chcr_send_wr(skb);
+       return -EINPROGRESS;
+}
+
+static int chcr_ahash_finup(struct ahash_request *req)
+{
+       struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
+       struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req);
+       struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm));
+       struct uld_ctx *u_ctx = NULL;
+       struct sk_buff *skb;
+       struct hash_wr_param params;
+       u8  bs;
+
+       bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
+       u_ctx = ULD_CTX(ctx);
+
+       if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
+                                           ctx->tx_channel_id))) {
+               if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+                       return -EBUSY;
+       }
+
+       if (is_hmac(crypto_ahash_tfm(rtfm)))
+               params.opad_needed = 1;
+       else
+               params.opad_needed = 0;
+
+       params.sg_len = req->nbytes;
+       params.bfr_len = req_ctx->bfr_len;
+       get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
+       req_ctx->data_len += params.bfr_len + params.sg_len;
+       req_ctx->result = 1;
+       if (req_ctx->bfr && (req_ctx->bfr_len + req->nbytes) == 0) {
+               create_last_hash_block(req_ctx->bfr, bs, req_ctx->data_len);
+               params.last = 0;
+               params.more = 1;
+               params.scmd1 = 0;
+               params.bfr_len = bs;
+       } else {
+               params.scmd1 = req_ctx->data_len;
+               params.last = 1;
+               params.more = 0;
+       }
+
+       skb = create_final_hash_wr(req, &params);
+       if (!skb)
+               return -ENOMEM;
+       skb->dev = u_ctx->lldi.ports[0];
+       set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
+       chcr_send_wr(skb);
+
+       return -EINPROGRESS;
+}
+
+static int chcr_ahash_digest(struct ahash_request *req)
+{
+       struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
+       struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req);
+       struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm));
+       struct uld_ctx *u_ctx = NULL;
+       struct sk_buff *skb;
+       struct hash_wr_param params;
+       u8  bs;
+
+       rtfm->init(req);
+       bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
+
+       u_ctx = ULD_CTX(ctx);
+       if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
+                                           ctx->tx_channel_id))) {
+               if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+                       return -EBUSY;
+       }
+
+       if (is_hmac(crypto_ahash_tfm(rtfm)))
+               params.opad_needed = 1;
+       else
+               params.opad_needed = 0;
+
+       params.last = 0;
+       params.more = 0;
+       params.sg_len = req->nbytes;
+       params.bfr_len = 0;
+       params.scmd1 = 0;
+       get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
+       req_ctx->result = 1;
+       req_ctx->data_len += params.bfr_len + params.sg_len;
+
+       if (req_ctx->bfr && req->nbytes == 0) {
+               create_last_hash_block(req_ctx->bfr, bs, 0);
+               params.more = 1;
+               params.bfr_len = bs;
+       }
+
+       skb = create_final_hash_wr(req, &params);
+       if (!skb)
+               return -ENOMEM;
+
+       skb->dev = u_ctx->lldi.ports[0];
+       set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
+       chcr_send_wr(skb);
+       return -EINPROGRESS;
+}
+
+static int chcr_ahash_export(struct ahash_request *areq, void *out)
+{
+       struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+       struct chcr_ahash_req_ctx *state = out;
+
+       state->bfr_len = req_ctx->bfr_len;
+       state->data_len = req_ctx->data_len;
+       memcpy(state->bfr, req_ctx->bfr, CHCR_HASH_MAX_BLOCK_SIZE_128);
+       memcpy(state->partial_hash, req_ctx->partial_hash,
+              CHCR_HASH_MAX_DIGEST_SIZE);
+       return 0;
+}
+
+static int chcr_ahash_import(struct ahash_request *areq, const void *in)
+{
+       struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+       struct chcr_ahash_req_ctx *state = (struct chcr_ahash_req_ctx *)in;
+
+       req_ctx->bfr_len = state->bfr_len;
+       req_ctx->data_len = state->data_len;
+       req_ctx->dummy_payload_ptr = NULL;
+       memcpy(req_ctx->bfr, state->bfr, CHCR_HASH_MAX_BLOCK_SIZE_128);
+       memcpy(req_ctx->partial_hash, state->partial_hash,
+              CHCR_HASH_MAX_DIGEST_SIZE);
+       return 0;
+}
+
+static int chcr_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
+                            unsigned int keylen)
+{
+       struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+       struct hmac_ctx *hmacctx = HMAC_CTX(ctx);
+       unsigned int digestsize = crypto_ahash_digestsize(tfm);
+       unsigned int bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+       unsigned int i, err = 0, updated_digestsize;
+
+       /*
+        * use the key to calculate the ipad and opad. ipad will sent with the
+        * first request's data. opad will be sent with the final hash result
+        * ipad in hmacctx->ipad and opad in hmacctx->opad location
+        */
+       if (!hmacctx->desc)
+               return -EINVAL;
+       if (keylen > bs) {
+               err = crypto_shash_digest(hmacctx->desc, key, keylen,
+                                         hmacctx->ipad);
+               if (err)
+                       goto out;
+               keylen = digestsize;
+       } else {
+               memcpy(hmacctx->ipad, key, keylen);
+       }
+       memset(hmacctx->ipad + keylen, 0, bs - keylen);
+       memcpy(hmacctx->opad, hmacctx->ipad, bs);
+
+       for (i = 0; i < bs / sizeof(int); i++) {
+               *((unsigned int *)(&hmacctx->ipad) + i) ^= IPAD_DATA;
+               *((unsigned int *)(&hmacctx->opad) + i) ^= OPAD_DATA;
+       }
+
+       updated_digestsize = digestsize;
+       if (digestsize == SHA224_DIGEST_SIZE)
+               updated_digestsize = SHA256_DIGEST_SIZE;
+       else if (digestsize == SHA384_DIGEST_SIZE)
+               updated_digestsize = SHA512_DIGEST_SIZE;
+       err = chcr_compute_partial_hash(hmacctx->desc, hmacctx->ipad,
+                                       hmacctx->ipad, digestsize);
+       if (err)
+               goto out;
+       chcr_change_order(hmacctx->ipad, updated_digestsize);
+
+       err = chcr_compute_partial_hash(hmacctx->desc, hmacctx->opad,
+                                       hmacctx->opad, digestsize);
+       if (err)
+               goto out;
+       chcr_change_order(hmacctx->opad, updated_digestsize);
+out:
+       return err;
+}
+
+static int chcr_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+                              unsigned int key_len)
+{
+       struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+       struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+       int status = 0;
+       unsigned short context_size = 0;
+
+       if ((key_len == (AES_KEYSIZE_128 << 1)) ||
+           (key_len == (AES_KEYSIZE_256 << 1))) {
+               memcpy(ablkctx->key, key, key_len);
+               ablkctx->enckey_len = key_len;
+               context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD + key_len) >> 4;
+               ablkctx->key_ctx_hdr =
+                       FILL_KEY_CTX_HDR((key_len == AES_KEYSIZE_256) ?
+                                        CHCR_KEYCTX_CIPHER_KEY_SIZE_128 :
+                                        CHCR_KEYCTX_CIPHER_KEY_SIZE_256,
+                                        CHCR_KEYCTX_NO_KEY, 1,
+                                        0, context_size);
+               ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_XTS;
+       } else {
+               crypto_tfm_set_flags((struct crypto_tfm *)tfm,
+                                    CRYPTO_TFM_RES_BAD_KEY_LEN);
+               ablkctx->enckey_len = 0;
+               status = -EINVAL;
+       }
+       return status;
+}
+
+static int chcr_sha_init(struct ahash_request *areq)
+{
+       struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+       int digestsize =  crypto_ahash_digestsize(tfm);
+
+       req_ctx->data_len = 0;
+       req_ctx->dummy_payload_ptr = NULL;
+       req_ctx->bfr_len = 0;
+       req_ctx->skb = NULL;
+       req_ctx->result = 0;
+       copy_hash_init_values(req_ctx->partial_hash, digestsize);
+       return 0;
+}
+
+static int chcr_sha_cra_init(struct crypto_tfm *tfm)
+{
+       crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+                                sizeof(struct chcr_ahash_req_ctx));
+       return chcr_device_init(crypto_tfm_ctx(tfm));
+}
+
+static int chcr_hmac_init(struct ahash_request *areq)
+{
+       struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+       struct crypto_ahash *rtfm = crypto_ahash_reqtfm(areq);
+       struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm));
+       struct hmac_ctx *hmacctx = HMAC_CTX(ctx);
+       unsigned int digestsize = crypto_ahash_digestsize(rtfm);
+       unsigned int bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
+
+       chcr_sha_init(areq);
+       req_ctx->data_len = bs;
+       if (is_hmac(crypto_ahash_tfm(rtfm))) {
+               if (digestsize == SHA224_DIGEST_SIZE)
+                       memcpy(req_ctx->partial_hash, hmacctx->ipad,
+                              SHA256_DIGEST_SIZE);
+               else if (digestsize == SHA384_DIGEST_SIZE)
+                       memcpy(req_ctx->partial_hash, hmacctx->ipad,
+                              SHA512_DIGEST_SIZE);
+               else
+                       memcpy(req_ctx->partial_hash, hmacctx->ipad,
+                              digestsize);
+       }
+       return 0;
+}
+
+static int chcr_hmac_cra_init(struct crypto_tfm *tfm)
+{
+       struct chcr_context *ctx = crypto_tfm_ctx(tfm);
+       struct hmac_ctx *hmacctx = HMAC_CTX(ctx);
+       unsigned int digestsize =
+               crypto_ahash_digestsize(__crypto_ahash_cast(tfm));
+
+       crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+                                sizeof(struct chcr_ahash_req_ctx));
+       hmacctx->desc = chcr_alloc_shash(digestsize);
+       if (IS_ERR(hmacctx->desc))
+               return PTR_ERR(hmacctx->desc);
+       return chcr_device_init(crypto_tfm_ctx(tfm));
+}
+
+static void chcr_free_shash(struct shash_desc *desc)
+{
+       crypto_free_shash(desc->tfm);
+       kfree(desc);
+}
+
+static void chcr_hmac_cra_exit(struct crypto_tfm *tfm)
+{
+       struct chcr_context *ctx = crypto_tfm_ctx(tfm);
+       struct hmac_ctx *hmacctx = HMAC_CTX(ctx);
+
+       if (hmacctx->desc) {
+               chcr_free_shash(hmacctx->desc);
+               hmacctx->desc = NULL;
+       }
+}
+
+static struct chcr_alg_template driver_algs[] = {
+       /* AES-CBC */
+       {
+               .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+               .is_registered = 0,
+               .alg.crypto = {
+                       .cra_name               = "cbc(aes)",
+                       .cra_driver_name        = "cbc(aes-chcr)",
+                       .cra_priority           = CHCR_CRA_PRIORITY,
+                       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
+                               CRYPTO_ALG_ASYNC,
+                       .cra_blocksize          = AES_BLOCK_SIZE,
+                       .cra_ctxsize            = sizeof(struct chcr_context)
+                               + sizeof(struct ablk_ctx),
+                       .cra_alignmask          = 0,
+                       .cra_type               = &crypto_ablkcipher_type,
+                       .cra_module             = THIS_MODULE,
+                       .cra_init               = chcr_cra_init,
+                       .cra_exit               = NULL,
+                       .cra_u.ablkcipher       = {
+                               .min_keysize    = AES_MIN_KEY_SIZE,
+                               .max_keysize    = AES_MAX_KEY_SIZE,
+                               .ivsize         = AES_BLOCK_SIZE,
+                               .setkey                 = chcr_aes_cbc_setkey,
+                               .encrypt                = chcr_aes_encrypt,
+                               .decrypt                = chcr_aes_decrypt,
+                       }
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+               .is_registered = 0,
+               .alg.crypto =   {
+                       .cra_name               = "xts(aes)",
+                       .cra_driver_name        = "xts(aes-chcr)",
+                       .cra_priority           = CHCR_CRA_PRIORITY,
+                       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
+                               CRYPTO_ALG_ASYNC,
+                       .cra_blocksize          = AES_BLOCK_SIZE,
+                       .cra_ctxsize            = sizeof(struct chcr_context) +
+                               sizeof(struct ablk_ctx),
+                       .cra_alignmask          = 0,
+                       .cra_type               = &crypto_ablkcipher_type,
+                       .cra_module             = THIS_MODULE,
+                       .cra_init               = chcr_cra_init,
+                       .cra_exit               = NULL,
+                       .cra_u = {
+                               .ablkcipher = {
+                                       .min_keysize    = 2 * AES_MIN_KEY_SIZE,
+                                       .max_keysize    = 2 * AES_MAX_KEY_SIZE,
+                                       .ivsize         = AES_BLOCK_SIZE,
+                                       .setkey         = chcr_aes_xts_setkey,
+                                       .encrypt        = chcr_aes_encrypt,
+                                       .decrypt        = chcr_aes_decrypt,
+                               }
+                       }
+               }
+       },
+       /* SHA */
+       {
+               .type = CRYPTO_ALG_TYPE_AHASH,
+               .is_registered = 0,
+               .alg.hash = {
+                       .halg.digestsize = SHA1_DIGEST_SIZE,
+                       .halg.base = {
+                               .cra_name = "sha1",
+                               .cra_driver_name = "sha1-chcr",
+                               .cra_blocksize = SHA1_BLOCK_SIZE,
+                       }
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_AHASH,
+               .is_registered = 0,
+               .alg.hash = {
+                       .halg.digestsize = SHA256_DIGEST_SIZE,
+                       .halg.base = {
+                               .cra_name = "sha256",
+                               .cra_driver_name = "sha256-chcr",
+                               .cra_blocksize = SHA256_BLOCK_SIZE,
+                       }
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_AHASH,
+               .is_registered = 0,
+               .alg.hash = {
+                       .halg.digestsize = SHA224_DIGEST_SIZE,
+                       .halg.base = {
+                               .cra_name = "sha224",
+                               .cra_driver_name = "sha224-chcr",
+                               .cra_blocksize = SHA224_BLOCK_SIZE,
+                       }
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_AHASH,
+               .is_registered = 0,
+               .alg.hash = {
+                       .halg.digestsize = SHA384_DIGEST_SIZE,
+                       .halg.base = {
+                               .cra_name = "sha384",
+                               .cra_driver_name = "sha384-chcr",
+                               .cra_blocksize = SHA384_BLOCK_SIZE,
+                       }
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_AHASH,
+               .is_registered = 0,
+               .alg.hash = {
+                       .halg.digestsize = SHA512_DIGEST_SIZE,
+                       .halg.base = {
+                               .cra_name = "sha512",
+                               .cra_driver_name = "sha512-chcr",
+                               .cra_blocksize = SHA512_BLOCK_SIZE,
+                       }
+               }
+       },
+       /* HMAC */
+       {
+               .type = CRYPTO_ALG_TYPE_HMAC,
+               .is_registered = 0,
+               .alg.hash = {
+                       .halg.digestsize = SHA1_DIGEST_SIZE,
+                       .halg.base = {
+                               .cra_name = "hmac(sha1)",
+                               .cra_driver_name = "hmac(sha1-chcr)",
+                               .cra_blocksize = SHA1_BLOCK_SIZE,
+                       }
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_HMAC,
+               .is_registered = 0,
+               .alg.hash = {
+                       .halg.digestsize = SHA224_DIGEST_SIZE,
+                       .halg.base = {
+                               .cra_name = "hmac(sha224)",
+                               .cra_driver_name = "hmac(sha224-chcr)",
+                               .cra_blocksize = SHA224_BLOCK_SIZE,
+                       }
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_HMAC,
+               .is_registered = 0,
+               .alg.hash = {
+                       .halg.digestsize = SHA256_DIGEST_SIZE,
+                       .halg.base = {
+                               .cra_name = "hmac(sha256)",
+                               .cra_driver_name = "hmac(sha256-chcr)",
+                               .cra_blocksize = SHA256_BLOCK_SIZE,
+                       }
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_HMAC,
+               .is_registered = 0,
+               .alg.hash = {
+                       .halg.digestsize = SHA384_DIGEST_SIZE,
+                       .halg.base = {
+                               .cra_name = "hmac(sha384)",
+                               .cra_driver_name = "hmac(sha384-chcr)",
+                               .cra_blocksize = SHA384_BLOCK_SIZE,
+                       }
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_HMAC,
+               .is_registered = 0,
+               .alg.hash = {
+                       .halg.digestsize = SHA512_DIGEST_SIZE,
+                       .halg.base = {
+                               .cra_name = "hmac(sha512)",
+                               .cra_driver_name = "hmac(sha512-chcr)",
+                               .cra_blocksize = SHA512_BLOCK_SIZE,
+                       }
+               }
+       },
+};
+
+/*
+ *     chcr_unregister_alg - Deregister crypto algorithms with
+ *     kernel framework.
+ */
+static int chcr_unregister_alg(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
+               switch (driver_algs[i].type & CRYPTO_ALG_TYPE_MASK) {
+               case CRYPTO_ALG_TYPE_ABLKCIPHER:
+                       if (driver_algs[i].is_registered)
+                               crypto_unregister_alg(
+                                               &driver_algs[i].alg.crypto);
+                       break;
+               case CRYPTO_ALG_TYPE_AHASH:
+                       if (driver_algs[i].is_registered)
+                               crypto_unregister_ahash(
+                                               &driver_algs[i].alg.hash);
+                       break;
+               }
+               driver_algs[i].is_registered = 0;
+       }
+       return 0;
+}
+
+#define SZ_AHASH_CTX sizeof(struct chcr_context)
+#define SZ_AHASH_H_CTX (sizeof(struct chcr_context) + sizeof(struct hmac_ctx))
+#define SZ_AHASH_REQ_CTX sizeof(struct chcr_ahash_req_ctx)
+#define AHASH_CRA_FLAGS (CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC)
+
+/*
+ *     chcr_register_alg - Register crypto algorithms with kernel framework.
+ */
+static int chcr_register_alg(void)
+{
+       struct crypto_alg ai;
+       struct ahash_alg *a_hash;
+       int err = 0, i;
+       char *name = NULL;
+
+       for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
+               if (driver_algs[i].is_registered)
+                       continue;
+               switch (driver_algs[i].type & CRYPTO_ALG_TYPE_MASK) {
+               case CRYPTO_ALG_TYPE_ABLKCIPHER:
+                       err = crypto_register_alg(&driver_algs[i].alg.crypto);
+                       name = driver_algs[i].alg.crypto.cra_driver_name;
+                       break;
+               case CRYPTO_ALG_TYPE_AHASH:
+                       a_hash = &driver_algs[i].alg.hash;
+                       a_hash->update = chcr_ahash_update;
+                       a_hash->final = chcr_ahash_final;
+                       a_hash->finup = chcr_ahash_finup;
+                       a_hash->digest = chcr_ahash_digest;
+                       a_hash->export = chcr_ahash_export;
+                       a_hash->import = chcr_ahash_import;
+                       a_hash->halg.statesize = SZ_AHASH_REQ_CTX;
+                       a_hash->halg.base.cra_priority = CHCR_CRA_PRIORITY;
+                       a_hash->halg.base.cra_module = THIS_MODULE;
+                       a_hash->halg.base.cra_flags = AHASH_CRA_FLAGS;
+                       a_hash->halg.base.cra_alignmask = 0;
+                       a_hash->halg.base.cra_exit = NULL;
+                       a_hash->halg.base.cra_type = &crypto_ahash_type;
+
+                       if (driver_algs[i].type == CRYPTO_ALG_TYPE_HMAC) {
+                               a_hash->halg.base.cra_init = chcr_hmac_cra_init;
+                               a_hash->halg.base.cra_exit = chcr_hmac_cra_exit;
+                               a_hash->init = chcr_hmac_init;
+                               a_hash->setkey = chcr_ahash_setkey;
+                               a_hash->halg.base.cra_ctxsize = SZ_AHASH_H_CTX;
+                       } else {
+                               a_hash->init = chcr_sha_init;
+                               a_hash->halg.base.cra_ctxsize = SZ_AHASH_CTX;
+                               a_hash->halg.base.cra_init = chcr_sha_cra_init;
+                       }
+                       err = crypto_register_ahash(&driver_algs[i].alg.hash);
+                       ai = driver_algs[i].alg.hash.halg.base;
+                       name = ai.cra_driver_name;
+                       break;
+               }
+               if (err) {
+                       pr_err("chcr : %s : Algorithm registration failed\n",
+                              name);
+                       goto register_err;
+               } else {
+                       driver_algs[i].is_registered = 1;
+               }
+       }
+       return 0;
+
+register_err:
+       chcr_unregister_alg();
+       return err;
+}
+
+/*
+ *     start_crypto - Register the crypto algorithms.
+ *     This should called once when the first device comesup. After this
+ *     kernel will start calling driver APIs for crypto operations.
+ */
+int start_crypto(void)
+{
+       return chcr_register_alg();
+}
+
+/*
+ *     stop_crypto - Deregister all the crypto algorithms with kernel.
+ *     This should be called once when the last device goes down. After this
+ *     kernel will not call the driver API for crypto operations.
+ */
+int stop_crypto(void)
+{
+       chcr_unregister_alg();
+       return 0;
+}
diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h
new file mode 100644 (file)
index 0000000..ec64fbc
--- /dev/null
@@ -0,0 +1,471 @@
+/*
+ * This file is part of the Chelsio T6 Crypto driver for Linux.
+ *
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __CHCR_ALGO_H__
+#define __CHCR_ALGO_H__
+
+/* Crypto key context */
+#define KEY_CONTEXT_CTX_LEN_S           24
+#define KEY_CONTEXT_CTX_LEN_M           0xff
+#define KEY_CONTEXT_CTX_LEN_V(x)        ((x) << KEY_CONTEXT_CTX_LEN_S)
+#define KEY_CONTEXT_CTX_LEN_G(x) \
+       (((x) >> KEY_CONTEXT_CTX_LEN_S) & KEY_CONTEXT_CTX_LEN_M)
+
+#define KEY_CONTEXT_DUAL_CK_S      12
+#define KEY_CONTEXT_DUAL_CK_M      0x1
+#define KEY_CONTEXT_DUAL_CK_V(x)   ((x) << KEY_CONTEXT_DUAL_CK_S)
+#define KEY_CONTEXT_DUAL_CK_G(x)   \
+(((x) >> KEY_CONTEXT_DUAL_CK_S) & KEY_CONTEXT_DUAL_CK_M)
+#define KEY_CONTEXT_DUAL_CK_F      KEY_CONTEXT_DUAL_CK_V(1U)
+
+#define KEY_CONTEXT_SALT_PRESENT_S      10
+#define KEY_CONTEXT_SALT_PRESENT_M      0x1
+#define KEY_CONTEXT_SALT_PRESENT_V(x)   ((x) << KEY_CONTEXT_SALT_PRESENT_S)
+#define KEY_CONTEXT_SALT_PRESENT_G(x)   \
+       (((x) >> KEY_CONTEXT_SALT_PRESENT_S) & \
+        KEY_CONTEXT_SALT_PRESENT_M)
+#define KEY_CONTEXT_SALT_PRESENT_F      KEY_CONTEXT_SALT_PRESENT_V(1U)
+
+#define KEY_CONTEXT_VALID_S     0
+#define KEY_CONTEXT_VALID_M     0x1
+#define KEY_CONTEXT_VALID_V(x)  ((x) << KEY_CONTEXT_VALID_S)
+#define KEY_CONTEXT_VALID_G(x)  \
+       (((x) >> KEY_CONTEXT_VALID_S) & \
+        KEY_CONTEXT_VALID_M)
+#define KEY_CONTEXT_VALID_F     KEY_CONTEXT_VALID_V(1U)
+
+#define KEY_CONTEXT_CK_SIZE_S           6
+#define KEY_CONTEXT_CK_SIZE_M           0xf
+#define KEY_CONTEXT_CK_SIZE_V(x)        ((x) << KEY_CONTEXT_CK_SIZE_S)
+#define KEY_CONTEXT_CK_SIZE_G(x)        \
+       (((x) >> KEY_CONTEXT_CK_SIZE_S) & KEY_CONTEXT_CK_SIZE_M)
+
+#define KEY_CONTEXT_MK_SIZE_S           2
+#define KEY_CONTEXT_MK_SIZE_M           0xf
+#define KEY_CONTEXT_MK_SIZE_V(x)        ((x) << KEY_CONTEXT_MK_SIZE_S)
+#define KEY_CONTEXT_MK_SIZE_G(x)        \
+       (((x) >> KEY_CONTEXT_MK_SIZE_S) & KEY_CONTEXT_MK_SIZE_M)
+
+#define KEY_CONTEXT_OPAD_PRESENT_S      11
+#define KEY_CONTEXT_OPAD_PRESENT_M      0x1
+#define KEY_CONTEXT_OPAD_PRESENT_V(x)   ((x) << KEY_CONTEXT_OPAD_PRESENT_S)
+#define KEY_CONTEXT_OPAD_PRESENT_G(x)   \
+       (((x) >> KEY_CONTEXT_OPAD_PRESENT_S) & \
+        KEY_CONTEXT_OPAD_PRESENT_M)
+#define KEY_CONTEXT_OPAD_PRESENT_F      KEY_CONTEXT_OPAD_PRESENT_V(1U)
+
+#define CHCR_HASH_MAX_DIGEST_SIZE 64
+#define CHCR_MAX_SHA_DIGEST_SIZE 64
+
+#define IPSEC_TRUNCATED_ICV_SIZE 12
+#define TLS_TRUNCATED_HMAC_SIZE 10
+#define CBCMAC_DIGEST_SIZE 16
+#define MAX_HASH_NAME 20
+
+#define SHA1_INIT_STATE_5X4B    5
+#define SHA256_INIT_STATE_8X4B  8
+#define SHA512_INIT_STATE_8X8B  8
+#define SHA1_INIT_STATE         SHA1_INIT_STATE_5X4B
+#define SHA224_INIT_STATE       SHA256_INIT_STATE_8X4B
+#define SHA256_INIT_STATE       SHA256_INIT_STATE_8X4B
+#define SHA384_INIT_STATE       SHA512_INIT_STATE_8X8B
+#define SHA512_INIT_STATE       SHA512_INIT_STATE_8X8B
+
+#define DUMMY_BYTES 16
+
+#define IPAD_DATA 0x36363636
+#define OPAD_DATA 0x5c5c5c5c
+
+#define TRANSHDR_SIZE(alignedkctx_len)\
+       (sizeof(struct ulptx_idata) +\
+        sizeof(struct ulp_txpkt) +\
+        sizeof(struct fw_crypto_lookaside_wr) +\
+        sizeof(struct cpl_tx_sec_pdu) +\
+        (alignedkctx_len))
+#define CIPHER_TRANSHDR_SIZE(alignedkctx_len, sge_pairs) \
+       (TRANSHDR_SIZE(alignedkctx_len) + sge_pairs +\
+        sizeof(struct cpl_rx_phys_dsgl))
+#define HASH_TRANSHDR_SIZE(alignedkctx_len)\
+       (TRANSHDR_SIZE(alignedkctx_len) + DUMMY_BYTES)
+
+#define SEC_CPL_OFFSET (sizeof(struct fw_crypto_lookaside_wr) + \
+                       sizeof(struct ulp_txpkt) + \
+                       sizeof(struct ulptx_idata))
+
+#define FILL_SEC_CPL_OP_IVINSR(id, len, hldr, ofst)      \
+       htonl( \
+              CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) | \
+              CPL_TX_SEC_PDU_RXCHID_V((id)) | \
+              CPL_TX_SEC_PDU_ACKFOLLOWS_V(0) | \
+              CPL_TX_SEC_PDU_ULPTXLPBK_V(1) | \
+              CPL_TX_SEC_PDU_CPLLEN_V((len)) | \
+              CPL_TX_SEC_PDU_PLACEHOLDER_V((hldr)) | \
+              CPL_TX_SEC_PDU_IVINSRTOFST_V((ofst)))
+
+#define  FILL_SEC_CPL_CIPHERSTOP_HI(a_start, a_stop, c_start, c_stop_hi) \
+       htonl( \
+              CPL_TX_SEC_PDU_AADSTART_V((a_start)) | \
+              CPL_TX_SEC_PDU_AADSTOP_V((a_stop)) | \
+              CPL_TX_SEC_PDU_CIPHERSTART_V((c_start)) | \
+              CPL_TX_SEC_PDU_CIPHERSTOP_HI_V((c_stop_hi)))
+
+#define  FILL_SEC_CPL_AUTHINSERT(c_stop_lo, a_start, a_stop, a_inst) \
+       htonl( \
+              CPL_TX_SEC_PDU_CIPHERSTOP_LO_V((c_stop_lo)) | \
+               CPL_TX_SEC_PDU_AUTHSTART_V((a_start)) | \
+               CPL_TX_SEC_PDU_AUTHSTOP_V((a_stop)) | \
+               CPL_TX_SEC_PDU_AUTHINSERT_V((a_inst)))
+
+#define  FILL_SEC_CPL_SCMD0_SEQNO(ctrl, seq, cmode, amode, opad, size, nivs)  \
+               htonl( \
+               SCMD_SEQ_NO_CTRL_V(0) | \
+               SCMD_STATUS_PRESENT_V(0) | \
+               SCMD_PROTO_VERSION_V(CHCR_SCMD_PROTO_VERSION_GENERIC) | \
+               SCMD_ENC_DEC_CTRL_V((ctrl)) | \
+               SCMD_CIPH_AUTH_SEQ_CTRL_V((seq)) | \
+               SCMD_CIPH_MODE_V((cmode)) | \
+               SCMD_AUTH_MODE_V((amode)) | \
+               SCMD_HMAC_CTRL_V((opad)) | \
+               SCMD_IV_SIZE_V((size)) | \
+               SCMD_NUM_IVS_V((nivs)))
+
+#define FILL_SEC_CPL_IVGEN_HDRLEN(last, more, ctx_in, mac, ivdrop, len) htonl( \
+               SCMD_ENB_DBGID_V(0) | \
+               SCMD_IV_GEN_CTRL_V(0) | \
+               SCMD_LAST_FRAG_V((last)) | \
+               SCMD_MORE_FRAGS_V((more)) | \
+               SCMD_TLS_COMPPDU_V(0) | \
+               SCMD_KEY_CTX_INLINE_V((ctx_in)) | \
+               SCMD_TLS_FRAG_ENABLE_V(0) | \
+               SCMD_MAC_ONLY_V((mac)) |  \
+               SCMD_AADIVDROP_V((ivdrop)) | \
+               SCMD_HDR_LEN_V((len)))
+
+#define  FILL_KEY_CTX_HDR(ck_size, mk_size, d_ck, opad, ctx_len) \
+               htonl(KEY_CONTEXT_VALID_V(1) | \
+                     KEY_CONTEXT_CK_SIZE_V((ck_size)) | \
+                     KEY_CONTEXT_MK_SIZE_V(mk_size) | \
+                     KEY_CONTEXT_DUAL_CK_V((d_ck)) | \
+                     KEY_CONTEXT_OPAD_PRESENT_V((opad)) | \
+                     KEY_CONTEXT_SALT_PRESENT_V(1) | \
+                     KEY_CONTEXT_CTX_LEN_V((ctx_len)))
+
+#define FILL_WR_OP_CCTX_SIZE(len, ctx_len) \
+               htonl( \
+                       FW_CRYPTO_LOOKASIDE_WR_OPCODE_V( \
+                       FW_CRYPTO_LOOKASIDE_WR) | \
+                       FW_CRYPTO_LOOKASIDE_WR_COMPL_V(0) | \
+                       FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_V((len)) | \
+                       FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_V(1) | \
+                       FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_V((ctx_len)))
+
+#define FILL_WR_RX_Q_ID(cid, qid, wr_iv) \
+               htonl( \
+                       FW_CRYPTO_LOOKASIDE_WR_RX_CHID_V((cid)) | \
+                       FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_V((qid)) | \
+                       FW_CRYPTO_LOOKASIDE_WR_LCB_V(0) | \
+                       FW_CRYPTO_LOOKASIDE_WR_IV_V((wr_iv)))
+
+#define FILL_ULPTX_CMD_DEST(cid) \
+       htonl(ULPTX_CMD_V(ULP_TX_PKT) | \
+             ULP_TXPKT_DEST_V(0) | \
+             ULP_TXPKT_DATAMODIFY_V(0) | \
+             ULP_TXPKT_CHANNELID_V((cid)) | \
+             ULP_TXPKT_RO_V(1) | \
+             ULP_TXPKT_FID_V(0))
+
+#define KEYCTX_ALIGN_PAD(bs) ({unsigned int _bs = (bs);\
+                             _bs == SHA1_DIGEST_SIZE ? 12 : 0; })
+
+#define FILL_PLD_SIZE_HASH_SIZE(payload_sgl_len, sgl_lengths, total_frags) \
+       htonl(FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_V(payload_sgl_len ? \
+                                               sgl_lengths[total_frags] : 0) |\
+             FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_V(0))
+
+#define FILL_LEN_PKD(calc_tx_flits_ofld, skb) \
+       htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP((\
+                                          calc_tx_flits_ofld(skb) * 8), 16)))
+
+#define FILL_CMD_MORE(immdatalen) htonl(ULPTX_CMD_V(ULP_TX_SC_IMM) |\
+                                       ULP_TX_SC_MORE_V((immdatalen) ? 0 : 1))
+
+#define MAX_NK 8
+#define CRYPTO_MAX_IMM_TX_PKT_LEN 256
+
+struct algo_param {
+       unsigned int auth_mode;
+       unsigned int mk_size;
+       unsigned int result_size;
+};
+
+struct hash_wr_param {
+       unsigned int opad_needed;
+       unsigned int more;
+       unsigned int last;
+       struct algo_param alg_prm;
+       unsigned int sg_len;
+       unsigned int bfr_len;
+       u64 scmd1;
+};
+
+enum {
+       AES_KEYLENGTH_128BIT = 128,
+       AES_KEYLENGTH_192BIT = 192,
+       AES_KEYLENGTH_256BIT = 256
+};
+
+enum {
+       KEYLENGTH_3BYTES = 3,
+       KEYLENGTH_4BYTES = 4,
+       KEYLENGTH_6BYTES = 6,
+       KEYLENGTH_8BYTES = 8
+};
+
+enum {
+       NUMBER_OF_ROUNDS_10 = 10,
+       NUMBER_OF_ROUNDS_12 = 12,
+       NUMBER_OF_ROUNDS_14 = 14,
+};
+
+/*
+ * CCM defines values of 4, 6, 8, 10, 12, 14, and 16 octets,
+ * where they indicate the size of the integrity check value (ICV)
+ */
+enum {
+       AES_CCM_ICV_4   = 4,
+       AES_CCM_ICV_6   = 6,
+       AES_CCM_ICV_8   = 8,
+       AES_CCM_ICV_10  = 10,
+       AES_CCM_ICV_12  = 12,
+       AES_CCM_ICV_14  = 14,
+       AES_CCM_ICV_16 = 16
+};
+
+struct hash_op_params {
+       unsigned char mk_size;
+       unsigned char pad_align;
+       unsigned char auth_mode;
+       char hash_name[MAX_HASH_NAME];
+       unsigned short block_size;
+       unsigned short word_size;
+       unsigned short ipad_size;
+};
+
+struct phys_sge_pairs {
+       __be16 len[8];
+       __be64 addr[8];
+};
+
+struct phys_sge_parm {
+       unsigned int nents;
+       unsigned int obsize;
+       unsigned short qid;
+       unsigned char align;
+};
+
+struct crypto_result {
+       struct completion completion;
+       int err;
+};
+
+static const u32 sha1_init[SHA1_DIGEST_SIZE / 4] = {
+               SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4,
+};
+
+static const u32 sha224_init[SHA256_DIGEST_SIZE / 4] = {
+               SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
+               SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
+};
+
+static const u32 sha256_init[SHA256_DIGEST_SIZE / 4] = {
+               SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
+               SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
+};
+
+static const u64 sha384_init[SHA512_DIGEST_SIZE / 8] = {
+               SHA384_H0, SHA384_H1, SHA384_H2, SHA384_H3,
+               SHA384_H4, SHA384_H5, SHA384_H6, SHA384_H7,
+};
+
+static const u64 sha512_init[SHA512_DIGEST_SIZE / 8] = {
+               SHA512_H0, SHA512_H1, SHA512_H2, SHA512_H3,
+               SHA512_H4, SHA512_H5, SHA512_H6, SHA512_H7,
+};
+
+static inline void copy_hash_init_values(char *key, int digestsize)
+{
+       u8 i;
+       __be32 *dkey = (__be32 *)key;
+       u64 *ldkey = (u64 *)key;
+       __be64 *sha384 = (__be64 *)sha384_init;
+       __be64 *sha512 = (__be64 *)sha512_init;
+
+       switch (digestsize) {
+       case SHA1_DIGEST_SIZE:
+               for (i = 0; i < SHA1_INIT_STATE; i++)
+                       dkey[i] = cpu_to_be32(sha1_init[i]);
+               break;
+       case SHA224_DIGEST_SIZE:
+               for (i = 0; i < SHA224_INIT_STATE; i++)
+                       dkey[i] = cpu_to_be32(sha224_init[i]);
+               break;
+       case SHA256_DIGEST_SIZE:
+               for (i = 0; i < SHA256_INIT_STATE; i++)
+                       dkey[i] = cpu_to_be32(sha256_init[i]);
+               break;
+       case SHA384_DIGEST_SIZE:
+               for (i = 0; i < SHA384_INIT_STATE; i++)
+                       ldkey[i] = be64_to_cpu(sha384[i]);
+               break;
+       case SHA512_DIGEST_SIZE:
+               for (i = 0; i < SHA512_INIT_STATE; i++)
+                       ldkey[i] = be64_to_cpu(sha512[i]);
+               break;
+       }
+}
+
+static const u8 sgl_lengths[20] = {
+       0, 1, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 10, 10, 11, 12, 13, 13, 14, 15
+};
+
+/* Number of len fields(8) * size of one addr field */
+#define PHYSDSGL_MAX_LEN_SIZE 16
+
+static inline u16 get_space_for_phys_dsgl(unsigned int sgl_entr)
+{
+       /* len field size + addr field size */
+       return ((sgl_entr >> 3) + ((sgl_entr % 8) ?
+                                  1 : 0)) * PHYSDSGL_MAX_LEN_SIZE +
+               (sgl_entr << 3) + ((sgl_entr % 2 ? 1 : 0) << 3);
+}
+
+/* The AES s-transform matrix (s-box). */
+static const u8 aes_sbox[256] = {
+       99,  124, 119, 123, 242, 107, 111, 197, 48,  1,   103, 43,  254, 215,
+       171, 118, 202, 130, 201, 125, 250, 89,  71,  240, 173, 212, 162, 175,
+       156, 164, 114, 192, 183, 253, 147, 38,  54,  63,  247, 204, 52,  165,
+       229, 241, 113, 216, 49,  21, 4,   199, 35,  195, 24,  150, 5, 154, 7,
+       18,  128, 226, 235, 39,  178, 117, 9,   131, 44,  26,  27,  110, 90,
+       160, 82,  59,  214, 179, 41,  227, 47,  132, 83,  209, 0,   237, 32,
+       252, 177, 91,  106, 203, 190, 57,  74,  76,  88,  207, 208, 239, 170,
+       251, 67,  77,  51,  133, 69,  249, 2,   127, 80,  60,  159, 168, 81,
+       163, 64,  143, 146, 157, 56,  245, 188, 182, 218, 33,  16,  255, 243,
+       210, 205, 12,  19,  236, 95,  151, 68,  23,  196, 167, 126, 61,  100,
+       93,  25,  115, 96,  129, 79,  220, 34,  42,  144, 136, 70,  238, 184,
+       20,  222, 94,  11,  219, 224, 50,  58,  10,  73,  6,   36,  92,  194,
+       211, 172, 98,  145, 149, 228, 121, 231, 200, 55,  109, 141, 213, 78,
+       169, 108, 86,  244, 234, 101, 122, 174, 8, 186, 120, 37,  46,  28, 166,
+       180, 198, 232, 221, 116, 31,  75,  189, 139, 138, 112, 62,  181, 102,
+       72,  3,   246, 14,  97,  53,  87,  185, 134, 193, 29,  158, 225, 248,
+       152, 17,  105, 217, 142, 148, 155, 30,  135, 233, 206, 85,  40,  223,
+       140, 161, 137, 13,  191, 230, 66,  104, 65,  153, 45,  15,  176, 84,
+       187, 22
+};
+
+static u32 aes_ks_subword(const u32 w)
+{
+       u8 bytes[4];
+
+       *(u32 *)(&bytes[0]) = w;
+       bytes[0] = aes_sbox[bytes[0]];
+       bytes[1] = aes_sbox[bytes[1]];
+       bytes[2] = aes_sbox[bytes[2]];
+       bytes[3] = aes_sbox[bytes[3]];
+       return *(u32 *)(&bytes[0]);
+}
+
+static u32 round_constant[11] = {
+       0x01000000, 0x02000000, 0x04000000, 0x08000000,
+       0x10000000, 0x20000000, 0x40000000, 0x80000000,
+       0x1B000000, 0x36000000, 0x6C000000
+};
+
+/* dec_key - OUTPUT - Reverse round key
+ * key - INPUT - key
+ * keylength - INPUT - length of the key in number of bits
+ */
+static inline void get_aes_decrypt_key(unsigned char *dec_key,
+                                      const unsigned char *key,
+                                      unsigned int keylength)
+{
+       u32 temp;
+       u32 w_ring[MAX_NK];
+       int i, j, k = 0;
+       u8  nr, nk;
+
+       switch (keylength) {
+       case AES_KEYLENGTH_128BIT:
+               nk = KEYLENGTH_4BYTES;
+               nr = NUMBER_OF_ROUNDS_10;
+               break;
+
+       case AES_KEYLENGTH_192BIT:
+               nk = KEYLENGTH_6BYTES;
+               nr = NUMBER_OF_ROUNDS_12;
+               break;
+       case AES_KEYLENGTH_256BIT:
+               nk = KEYLENGTH_8BYTES;
+               nr = NUMBER_OF_ROUNDS_14;
+               break;
+       default:
+               return;
+       }
+       for (i = 0; i < nk; i++ )
+               w_ring[i] = be32_to_cpu(*(u32 *)&key[4 * i]);
+
+       i = 0;
+       temp = w_ring[nk - 1];
+       while(i + nk < (nr + 1) * 4) {
+               if(!(i % nk)) {
+                       /* RotWord(temp) */
+                       temp = (temp << 8) | (temp >> 24);
+                       temp = aes_ks_subword(temp);
+                       temp ^= round_constant[i / nk];
+               }
+               else if (nk == 8 && (i % 4 == 0))
+                       temp = aes_ks_subword(temp);
+               w_ring[i % nk] ^= temp;
+               temp = w_ring[i % nk];
+               i++;
+       }
+       for (k = 0, j = i % nk; k < nk; k++) {
+               *((u32 *)dec_key + k) = htonl(w_ring[j]);
+               j--;
+               if(j < 0)
+                       j += nk;
+       }
+}
+
+#endif /* __CHCR_ALGO_H__ */
diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c
new file mode 100644 (file)
index 0000000..2f6156b
--- /dev/null
@@ -0,0 +1,240 @@
+/**
+ * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux.
+ *
+ * Copyright (C) 2011-2016 Chelsio Communications.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written and Maintained by:
+ * Manoj Malviya (manojmalviya@chelsio.com)
+ * Atul Gupta (atul.gupta@chelsio.com)
+ * Jitendra Lulla (jlulla@chelsio.com)
+ * Yeshaswi M R Gowda (yeshaswi@chelsio.com)
+ * Harsh Jain (harsh@chelsio.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <crypto/aes.h>
+#include <crypto/hash.h>
+
+#include "t4_msg.h"
+#include "chcr_core.h"
+#include "cxgb4_uld.h"
+
+static LIST_HEAD(uld_ctx_list);
+static DEFINE_MUTEX(dev_mutex);
+static atomic_t dev_count;
+
+typedef int (*chcr_handler_func)(struct chcr_dev *dev, unsigned char *input);
+static int cpl_fw6_pld_handler(struct chcr_dev *dev, unsigned char *input);
+static void *chcr_uld_add(const struct cxgb4_lld_info *lld);
+static int chcr_uld_state_change(void *handle, enum cxgb4_state state);
+
+static chcr_handler_func work_handlers[NUM_CPL_CMDS] = {
+       [CPL_FW6_PLD] = cpl_fw6_pld_handler,
+};
+
+static struct cxgb4_pci_uld_info chcr_uld_info = {
+       .name = DRV_MODULE_NAME,
+       .nrxq = 4,
+       .rxq_size = 1024,
+       .nciq = 0,
+       .ciq_size = 0,
+       .add = chcr_uld_add,
+       .state_change = chcr_uld_state_change,
+       .rx_handler = chcr_uld_rx_handler,
+};
+
+int assign_chcr_device(struct chcr_dev **dev)
+{
+       struct uld_ctx *u_ctx;
+
+       /*
+        * Which device to use if multiple devices are available TODO
+        * May be select the device based on round robin. One session
+        * must go to the same device to maintain the ordering.
+        */
+       mutex_lock(&dev_mutex); /* TODO ? */
+       u_ctx = list_first_entry(&uld_ctx_list, struct uld_ctx, entry);
+       if (!u_ctx) {
+               mutex_unlock(&dev_mutex);
+               return -ENXIO;
+       }
+
+       *dev = u_ctx->dev;
+       mutex_unlock(&dev_mutex);
+       return 0;
+}
+
+static int chcr_dev_add(struct uld_ctx *u_ctx)
+{
+       struct chcr_dev *dev;
+
+       dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+       if (!dev)
+               return -ENXIO;
+
+       spin_lock_init(&dev->lock_chcr_dev);
+       u_ctx->dev = dev;
+       dev->u_ctx = u_ctx;
+       atomic_inc(&dev_count);
+       return 0;
+}
+
+static int chcr_dev_remove(struct uld_ctx *u_ctx)
+{
+       kfree(u_ctx->dev);
+       u_ctx->dev = NULL;
+       atomic_dec(&dev_count);
+       return 0;
+}
+
+static int cpl_fw6_pld_handler(struct chcr_dev *dev,
+                              unsigned char *input)
+{
+       struct crypto_async_request *req;
+       struct cpl_fw6_pld *fw6_pld;
+       u32 ack_err_status = 0;
+       int error_status = 0;
+
+       fw6_pld = (struct cpl_fw6_pld *)input;
+       req = (struct crypto_async_request *)(uintptr_t)be64_to_cpu(
+                                                   fw6_pld->data[1]);
+
+       ack_err_status =
+               ntohl(*(__be32 *)((unsigned char *)&fw6_pld->data[0] + 4));
+       if (ack_err_status) {
+               if (CHK_MAC_ERR_BIT(ack_err_status) ||
+                   CHK_PAD_ERR_BIT(ack_err_status))
+                       error_status = -EINVAL;
+       }
+       /* call completion callback with failure status */
+       if (req) {
+               if (!chcr_handle_resp(req, input, error_status))
+                       req->complete(req, error_status);
+               else
+                       return -EINVAL;
+       } else {
+               pr_err("Incorrect request address from the firmware\n");
+               return -EFAULT;
+       }
+       return 0;
+}
+
+int chcr_send_wr(struct sk_buff *skb)
+{
+       return cxgb4_ofld_send(skb->dev, skb);
+}
+
+static void *chcr_uld_add(const struct cxgb4_lld_info *lld)
+{
+       struct uld_ctx *u_ctx;
+
+       /* Create the device and add it in the device list */
+       u_ctx = kzalloc(sizeof(*u_ctx), GFP_KERNEL);
+       if (!u_ctx) {
+               u_ctx = ERR_PTR(-ENOMEM);
+               goto out;
+       }
+       u_ctx->lldi = *lld;
+       mutex_lock(&dev_mutex);
+       list_add_tail(&u_ctx->entry, &uld_ctx_list);
+       mutex_unlock(&dev_mutex);
+out:
+       return u_ctx;
+}
+
+int chcr_uld_rx_handler(void *handle, const __be64 *rsp,
+                       const struct pkt_gl *pgl)
+{
+       struct uld_ctx *u_ctx = (struct uld_ctx *)handle;
+       struct chcr_dev *dev = u_ctx->dev;
+       const struct cpl_act_establish *rpl = (struct cpl_act_establish
+                                              *)rsp;
+
+       if (rpl->ot.opcode != CPL_FW6_PLD) {
+               pr_err("Unsupported opcode\n");
+               return 0;
+       }
+
+       if (!pgl)
+               work_handlers[rpl->ot.opcode](dev, (unsigned char *)&rsp[1]);
+       else
+               work_handlers[rpl->ot.opcode](dev, pgl->va);
+       return 0;
+}
+
+static int chcr_uld_state_change(void *handle, enum cxgb4_state state)
+{
+       struct uld_ctx *u_ctx = handle;
+       int ret = 0;
+
+       switch (state) {
+       case CXGB4_STATE_UP:
+               if (!u_ctx->dev) {
+                       ret = chcr_dev_add(u_ctx);
+                       if (ret != 0)
+                               return ret;
+               }
+               if (atomic_read(&dev_count) == 1)
+                       ret = start_crypto();
+               break;
+
+       case CXGB4_STATE_DETACH:
+               if (u_ctx->dev) {
+                       mutex_lock(&dev_mutex);
+                       chcr_dev_remove(u_ctx);
+                       mutex_unlock(&dev_mutex);
+               }
+               if (!atomic_read(&dev_count))
+                       stop_crypto();
+               break;
+
+       case CXGB4_STATE_START_RECOVERY:
+       case CXGB4_STATE_DOWN:
+       default:
+               break;
+       }
+       return ret;
+}
+
+static int __init chcr_crypto_init(void)
+{
+       if (cxgb4_register_pci_uld(CXGB4_PCI_ULD1, &chcr_uld_info)) {
+               pr_err("ULD register fail: No chcr crypto support in cxgb4");
+               return -1;
+       }
+
+       return 0;
+}
+
+static void __exit chcr_crypto_exit(void)
+{
+       struct uld_ctx *u_ctx, *tmp;
+
+       if (atomic_read(&dev_count))
+               stop_crypto();
+
+       /* Remove all devices from list */
+       mutex_lock(&dev_mutex);
+       list_for_each_entry_safe(u_ctx, tmp, &uld_ctx_list, entry) {
+               if (u_ctx->dev)
+                       chcr_dev_remove(u_ctx);
+               kfree(u_ctx);
+       }
+       mutex_unlock(&dev_mutex);
+       cxgb4_unregister_pci_uld(CXGB4_PCI_ULD1);
+}
+
+module_init(chcr_crypto_init);
+module_exit(chcr_crypto_exit);
+
+MODULE_DESCRIPTION("Crypto Co-processor for Chelsio Terminator cards.");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Chelsio Communications");
+MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h
new file mode 100644 (file)
index 0000000..2a5c671
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * This file is part of the Chelsio T6 Crypto driver for Linux.
+ *
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __CHCR_CORE_H__
+#define __CHCR_CORE_H__
+
+#include <crypto/algapi.h>
+#include "t4_hw.h"
+#include "cxgb4.h"
+#include "cxgb4_uld.h"
+
+#define DRV_MODULE_NAME "chcr"
+#define DRV_VERSION "1.0.0.0"
+
+#define MAX_PENDING_REQ_TO_HW 20
+#define CHCR_TEST_RESPONSE_TIMEOUT 1000
+
+#define PAD_ERROR_BIT          1
+#define CHK_PAD_ERR_BIT(x)     (((x) >> PAD_ERROR_BIT) & 1)
+
+#define MAC_ERROR_BIT          0
+#define CHK_MAC_ERR_BIT(x)     (((x) >> MAC_ERROR_BIT) & 1)
+
+struct uld_ctx;
+
+struct chcr_dev {
+       /* Request submited to h/w and waiting for response. */
+       spinlock_t lock_chcr_dev;
+       struct crypto_queue pending_queue;
+       struct uld_ctx *u_ctx;
+       unsigned char tx_channel_id;
+};
+
+struct uld_ctx {
+       struct list_head entry;
+       struct cxgb4_lld_info lldi;
+       struct chcr_dev *dev;
+};
+
+int assign_chcr_device(struct chcr_dev **dev);
+int chcr_send_wr(struct sk_buff *skb);
+int start_crypto(void);
+int stop_crypto(void);
+int chcr_uld_rx_handler(void *handle, const __be64 *rsp,
+                       const struct pkt_gl *pgl);
+int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
+                    int err);
+#endif /* __CHCR_CORE_H__ */
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
new file mode 100644 (file)
index 0000000..d7d7560
--- /dev/null
@@ -0,0 +1,203 @@
+/*
+ * This file is part of the Chelsio T6 Crypto driver for Linux.
+ *
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __CHCR_CRYPTO_H__
+#define __CHCR_CRYPTO_H__
+
+/* Define following if h/w is not dropping the AAD and IV data before
+ * giving the processed data
+ */
+
+#define CHCR_CRA_PRIORITY 300
+
+#define CHCR_AES_MAX_KEY_LEN  (2 * (AES_MAX_KEY_SIZE)) /* consider xts */
+#define CHCR_MAX_CRYPTO_IV_LEN 16 /* AES IV len */
+
+#define CHCR_MAX_AUTHENC_AES_KEY_LEN 32 /* max aes key length*/
+#define CHCR_MAX_AUTHENC_SHA_KEY_LEN 128 /* max sha key length*/
+
+#define CHCR_GIVENCRYPT_OP 2
+/* CPL/SCMD parameters */
+
+#define CHCR_ENCRYPT_OP 0
+#define CHCR_DECRYPT_OP 1
+
+#define CHCR_SCMD_SEQ_NO_CTRL_32BIT     1
+#define CHCR_SCMD_SEQ_NO_CTRL_48BIT     2
+#define CHCR_SCMD_SEQ_NO_CTRL_64BIT     3
+
+#define CHCR_SCMD_PROTO_VERSION_GENERIC 4
+
+#define CHCR_SCMD_AUTH_CTRL_AUTH_CIPHER 0
+#define CHCR_SCMD_AUTH_CTRL_CIPHER_AUTH 1
+
+#define CHCR_SCMD_CIPHER_MODE_NOP           0
+#define CHCR_SCMD_CIPHER_MODE_AES_CBC       1
+#define CHCR_SCMD_CIPHER_MODE_GENERIC_AES   4
+#define CHCR_SCMD_CIPHER_MODE_AES_XTS       6
+
+#define CHCR_SCMD_AUTH_MODE_NOP             0
+#define CHCR_SCMD_AUTH_MODE_SHA1            1
+#define CHCR_SCMD_AUTH_MODE_SHA224          2
+#define CHCR_SCMD_AUTH_MODE_SHA256          3
+#define CHCR_SCMD_AUTH_MODE_SHA512_224      5
+#define CHCR_SCMD_AUTH_MODE_SHA512_256      6
+#define CHCR_SCMD_AUTH_MODE_SHA512_384      7
+#define CHCR_SCMD_AUTH_MODE_SHA512_512      8
+
+#define CHCR_SCMD_HMAC_CTRL_NOP             0
+#define CHCR_SCMD_HMAC_CTRL_NO_TRUNC        1
+
+#define CHCR_SCMD_IVGEN_CTRL_HW             0
+#define CHCR_SCMD_IVGEN_CTRL_SW             1
+/* This are not really mac key size. They are intermediate values
+ * of sha engine and its size
+ */
+#define CHCR_KEYCTX_MAC_KEY_SIZE_128        0
+#define CHCR_KEYCTX_MAC_KEY_SIZE_160        1
+#define CHCR_KEYCTX_MAC_KEY_SIZE_192        2
+#define CHCR_KEYCTX_MAC_KEY_SIZE_256        3
+#define CHCR_KEYCTX_MAC_KEY_SIZE_512        4
+#define CHCR_KEYCTX_CIPHER_KEY_SIZE_128     0
+#define CHCR_KEYCTX_CIPHER_KEY_SIZE_192     1
+#define CHCR_KEYCTX_CIPHER_KEY_SIZE_256     2
+#define CHCR_KEYCTX_NO_KEY                  15
+
+#define CHCR_CPL_FW4_PLD_IV_OFFSET          (5 * 64) /* bytes. flt #5 and #6 */
+#define CHCR_CPL_FW4_PLD_HASH_RESULT_OFFSET (7 * 64) /* bytes. flt #7 */
+#define CHCR_CPL_FW4_PLD_DATA_SIZE          (4 * 64) /* bytes. flt #4 to #7 */
+
+#define KEY_CONTEXT_HDR_SALT_AND_PAD       16
+#define flits_to_bytes(x)  (x * 8)
+
+#define IV_NOP                  0
+#define IV_IMMEDIATE            1
+#define IV_DSGL                        2
+
+#define CRYPTO_ALG_SUB_TYPE_MASK            0x0f000000
+#define CRYPTO_ALG_SUB_TYPE_HASH_HMAC       0x01000000
+#define CRYPTO_ALG_TYPE_HMAC (CRYPTO_ALG_TYPE_AHASH |\
+                             CRYPTO_ALG_SUB_TYPE_HASH_HMAC)
+
+#define MAX_SALT                4
+#define MAX_SCRATCH_PAD_SIZE    32
+
+#define CHCR_HASH_MAX_BLOCK_SIZE_64  64
+#define CHCR_HASH_MAX_BLOCK_SIZE_128 128
+
+/* Aligned to 128 bit boundary */
+struct _key_ctx {
+       __be32 ctx_hdr;
+       u8 salt[MAX_SALT];
+       __be64 reserverd;
+       unsigned char key[0];
+};
+
+struct ablk_ctx {
+       u8 enc;
+       unsigned int processed_len;
+       __be32 key_ctx_hdr;
+       unsigned int enckey_len;
+       unsigned int dst_nents;
+       struct scatterlist iv_sg;
+       u8 key[CHCR_AES_MAX_KEY_LEN];
+       u8 iv[CHCR_MAX_CRYPTO_IV_LEN];
+       unsigned char ciph_mode;
+};
+
+struct hmac_ctx {
+       struct shash_desc *desc;
+       u8 ipad[CHCR_HASH_MAX_BLOCK_SIZE_128];
+       u8 opad[CHCR_HASH_MAX_BLOCK_SIZE_128];
+};
+
+struct __crypto_ctx {
+       struct hmac_ctx hmacctx[0];
+       struct ablk_ctx ablkctx[0];
+};
+
+struct chcr_context {
+       struct chcr_dev *dev;
+       unsigned char tx_channel_id;
+       struct __crypto_ctx crypto_ctx[0];
+};
+
+struct chcr_ahash_req_ctx {
+       u32 result;
+       char bfr[CHCR_HASH_MAX_BLOCK_SIZE_128];
+       u8 bfr_len;
+       /* DMA the partial hash in it */
+       u8 partial_hash[CHCR_HASH_MAX_DIGEST_SIZE];
+       u64 data_len;  /* Data len till time */
+       void *dummy_payload_ptr;
+       /* SKB which is being sent to the hardware for processing */
+       struct sk_buff *skb;
+};
+
+struct chcr_blkcipher_req_ctx {
+       struct sk_buff *skb;
+};
+
+struct chcr_alg_template {
+       u32 type;
+       u32 is_registered;
+       union {
+               struct crypto_alg crypto;
+               struct ahash_alg hash;
+       } alg;
+};
+
+struct chcr_req_ctx {
+       union {
+               struct ahash_request *ahash_req;
+               struct ablkcipher_request *ablk_req;
+       } req;
+       union {
+               struct chcr_ahash_req_ctx *ahash_ctx;
+               struct chcr_blkcipher_req_ctx *ablk_ctx;
+       } ctx;
+};
+
+struct sge_opaque_hdr {
+       void *dev;
+       dma_addr_t addr[MAX_SKB_FRAGS + 1];
+};
+
+typedef struct sk_buff *(*create_wr_t)(struct crypto_async_request *req,
+                                      struct chcr_context *ctx,
+                                      unsigned short qid,
+                                      unsigned short op_type);
+
+#endif /* __CHCR_CRYPTO_H__ */
index 769148d..20f35df 100644 (file)
@@ -1260,8 +1260,8 @@ static struct crypto_alg qat_algs[] = { {
                        .setkey = qat_alg_ablkcipher_xts_setkey,
                        .decrypt = qat_alg_ablkcipher_decrypt,
                        .encrypt = qat_alg_ablkcipher_encrypt,
-                       .min_keysize = AES_MIN_KEY_SIZE,
-                       .max_keysize = AES_MAX_KEY_SIZE,
+                       .min_keysize = 2 * AES_MIN_KEY_SIZE,
+                       .max_keysize = 2 * AES_MAX_KEY_SIZE,
                        .ivsize = AES_BLOCK_SIZE,
                },
        },
index cfb2541..24353ec 100644 (file)
@@ -129,8 +129,8 @@ static int p8_aes_xts_crypt(struct blkcipher_desc *desc,
 
                blkcipher_walk_init(&walk, dst, src, nbytes);
 
-               iv = (u8 *)walk.iv;
                ret = blkcipher_walk_virt(desc, &walk);
+               iv = walk.iv;
                memset(tweak, 0, AES_BLOCK_SIZE);
                aes_p8_encrypt(iv, tweak, &ctx->tweak_key);
 
index 803f395..29f600f 100644 (file)
@@ -459,7 +459,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
        }
 
        pgoff = linear_page_index(vma, pmd_addr);
-       phys = pgoff_to_phys(dax_dev, pgoff, PAGE_SIZE);
+       phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE);
        if (phys == -1) {
                dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
                                pgoff);
index dfb1685..1f01e98 100644 (file)
@@ -116,6 +116,9 @@ static int dax_pmem_probe(struct device *dev)
        if (rc)
                return rc;
 
+       /* adjust the dax_region resource to the start of data */
+       res.start += le64_to_cpu(pfn_sb->dataoff);
+
        nd_region = to_nd_region(dev->parent);
        dax_region = alloc_dax_region(dev, nd_region->id, &res,
                        le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP);
index e434ffe..832cbd6 100644 (file)
@@ -2067,7 +2067,7 @@ err_dma_unregister:
 err_clk_disable:
        clk_disable_unprepare(atxdmac->clk);
 err_free_irq:
-       free_irq(atxdmac->irq, atxdmac->dma.dev);
+       free_irq(atxdmac->irq, atxdmac);
        return ret;
 }
 
@@ -2081,7 +2081,7 @@ static int at_xdmac_remove(struct platform_device *pdev)
        dma_async_device_unregister(&atxdmac->dma);
        clk_disable_unprepare(atxdmac->clk);
 
-       free_irq(atxdmac->irq, atxdmac->dma.dev);
+       free_irq(atxdmac->irq, atxdmac);
 
        for (i = 0; i < atxdmac->dma.chancnt; i++) {
                struct at_xdmac_chan *atchan = &atxdmac->chan[i];
index aad167e..de2a2a2 100644 (file)
@@ -836,6 +836,7 @@ static int fsl_re_probe(struct platform_device *ofdev)
                rc = of_property_read_u32(np, "reg", &off);
                if (rc) {
                        dev_err(dev, "Reg property not found in JQ node\n");
+                       of_node_put(np);
                        return -ENODEV;
                }
                /* Find out the Job Rings present under each JQ */
index a4c53be..624f1e1 100644 (file)
@@ -861,7 +861,6 @@ static int mdc_dma_probe(struct platform_device *pdev)
 {
        struct mdc_dma *mdma;
        struct resource *res;
-       const struct of_device_id *match;
        unsigned int i;
        u32 val;
        int ret;
@@ -871,8 +870,7 @@ static int mdc_dma_probe(struct platform_device *pdev)
                return -ENOMEM;
        platform_set_drvdata(pdev, mdma);
 
-       match = of_match_device(mdc_dma_of_match, &pdev->dev);
-       mdma->soc = match->data;
+       mdma->soc = of_device_get_match_data(&pdev->dev);
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        mdma->regs = devm_ioremap_resource(&pdev->dev, res);
index dc7850a..3f56f9c 100644 (file)
@@ -638,7 +638,7 @@ static bool pxad_try_hotchain(struct virt_dma_chan *vc,
                vd_last_issued = list_entry(vc->desc_issued.prev,
                                            struct virt_dma_desc, node);
                pxad_desc_chain(vd_last_issued, vd);
-               if (is_chan_running(chan) || is_desc_completed(vd_last_issued))
+               if (is_chan_running(chan) || is_desc_completed(vd))
                        return true;
        }
 
@@ -671,6 +671,7 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id)
        struct virt_dma_desc *vd, *tmp;
        unsigned int dcsr;
        unsigned long flags;
+       bool vd_completed;
        dma_cookie_t last_started = 0;
 
        BUG_ON(!chan);
@@ -681,15 +682,17 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id)
 
        spin_lock_irqsave(&chan->vc.lock, flags);
        list_for_each_entry_safe(vd, tmp, &chan->vc.desc_issued, node) {
+               vd_completed = is_desc_completed(vd);
                dev_dbg(&chan->vc.chan.dev->device,
-                       "%s(): checking txd %p[%x]: completed=%d\n",
-                       __func__, vd, vd->tx.cookie, is_desc_completed(vd));
+                       "%s(): checking txd %p[%x]: completed=%d dcsr=0x%x\n",
+                       __func__, vd, vd->tx.cookie, vd_completed,
+                       dcsr);
                last_started = vd->tx.cookie;
                if (to_pxad_sw_desc(vd)->cyclic) {
                        vchan_cyclic_callback(vd);
                        break;
                }
-               if (is_desc_completed(vd)) {
+               if (vd_completed) {
                        list_del(&vd->node);
                        vchan_cookie_complete(vd);
                } else {
index 749f1bd..06ecdc3 100644 (file)
@@ -600,27 +600,30 @@ static irqreturn_t usb_dmac_isr_channel(int irq, void *dev)
 {
        struct usb_dmac_chan *chan = dev;
        irqreturn_t ret = IRQ_NONE;
-       u32 mask = USB_DMACHCR_TE;
-       u32 check_bits = USB_DMACHCR_TE | USB_DMACHCR_SP;
+       u32 mask = 0;
        u32 chcr;
+       bool xfer_end = false;
 
        spin_lock(&chan->vc.lock);
 
        chcr = usb_dmac_chan_read(chan, USB_DMACHCR);
-       if (chcr & check_bits)
-               mask |= USB_DMACHCR_DE | check_bits;
+       if (chcr & (USB_DMACHCR_TE | USB_DMACHCR_SP)) {
+               mask |= USB_DMACHCR_DE | USB_DMACHCR_TE | USB_DMACHCR_SP;
+               if (chcr & USB_DMACHCR_DE)
+                       xfer_end = true;
+               ret |= IRQ_HANDLED;
+       }
        if (chcr & USB_DMACHCR_NULL) {
                /* An interruption of TE will happen after we set FTE */
                mask |= USB_DMACHCR_NULL;
                chcr |= USB_DMACHCR_FTE;
                ret |= IRQ_HANDLED;
        }
-       usb_dmac_chan_write(chan, USB_DMACHCR, chcr & ~mask);
+       if (mask)
+               usb_dmac_chan_write(chan, USB_DMACHCR, chcr & ~mask);
 
-       if (chcr & check_bits) {
+       if (xfer_end)
                usb_dmac_isr_transfer_end(chan);
-               ret |= IRQ_HANDLED;
-       }
 
        spin_unlock(&chan->vc.lock);
 
index d0c1dab..dff1a4a 100644 (file)
@@ -251,6 +251,14 @@ config EDAC_SBRIDGE
          Support for error detection and correction the Intel
          Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers.
 
+config EDAC_SKX
+       tristate "Intel Skylake server Integrated MC"
+       depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
+       depends on PCI_MMCONFIG
+       help
+         Support for error detection and correction the Intel
+         Skylake server Integrated Memory Controllers.
+
 config EDAC_MPC85XX
        tristate "Freescale MPC83xx / MPC85xx"
        depends on EDAC_MM_EDAC && FSL_SOC
index f9e4a3e..9860499 100644 (file)
@@ -31,6 +31,7 @@ obj-$(CONFIG_EDAC_I5400)              += i5400_edac.o
 obj-$(CONFIG_EDAC_I7300)               += i7300_edac.o
 obj-$(CONFIG_EDAC_I7CORE)              += i7core_edac.o
 obj-$(CONFIG_EDAC_SBRIDGE)             += sb_edac.o
+obj-$(CONFIG_EDAC_SKX)                 += skx_edac.o
 obj-$(CONFIG_EDAC_E7XXX)               += e7xxx_edac.o
 obj-$(CONFIG_EDAC_E752X)               += e752x_edac.o
 obj-$(CONFIG_EDAC_I82443BXGX)          += i82443bxgx_edac.o
diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c
new file mode 100644 (file)
index 0000000..0ff4878
--- /dev/null
@@ -0,0 +1,1121 @@
+/*
+ * EDAC driver for Intel(R) Xeon(R) Skylake processors
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/edac.h>
+#include <linux/mmzone.h>
+#include <linux/smp.h>
+#include <linux/bitmap.h>
+#include <linux/math64.h>
+#include <linux/mod_devicetable.h>
+#include <asm/cpu_device_id.h>
+#include <asm/processor.h>
+#include <asm/mce.h>
+
+#include "edac_core.h"
+
+#define SKX_REVISION    " Ver: 1.0 "
+
+/*
+ * Debug macros
+ */
+#define skx_printk(level, fmt, arg...)                 \
+       edac_printk(level, "skx", fmt, ##arg)
+
+#define skx_mc_printk(mci, level, fmt, arg...)         \
+       edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
+
+/*
+ * Get a bit field at register value <v>, from bit <lo> to bit <hi>
+ */
+#define GET_BITFIELD(v, lo, hi) \
+       (((v) & GENMASK_ULL((hi), (lo))) >> (lo))
+
+static LIST_HEAD(skx_edac_list);
+
+static u64 skx_tolm, skx_tohm;
+
+#define NUM_IMC                        2       /* memory controllers per socket */
+#define NUM_CHANNELS           3       /* channels per memory controller */
+#define NUM_DIMMS              2       /* Max DIMMS per channel */
+
+#define        MASK26  0x3FFFFFF               /* Mask for 2^26 */
+#define MASK29 0x1FFFFFFF              /* Mask for 2^29 */
+
+/*
+ * Each cpu socket contains some pci devices that provide global
+ * information, and also some that are local to each of the two
+ * memory controllers on the die.
+ */
+struct skx_dev {
+       struct list_head        list;
+       u8                      bus[4];
+       struct pci_dev  *sad_all;
+       struct pci_dev  *util_all;
+       u32     mcroute;
+       struct skx_imc {
+               struct mem_ctl_info *mci;
+               u8      mc;     /* system wide mc# */
+               u8      lmc;    /* socket relative mc# */
+               u8      src_id, node_id;
+               struct skx_channel {
+                       struct pci_dev *cdev;
+                       struct skx_dimm {
+                               u8      close_pg;
+                               u8      bank_xor_enable;
+                               u8      fine_grain_bank;
+                               u8      rowbits;
+                               u8      colbits;
+                       } dimms[NUM_DIMMS];
+               } chan[NUM_CHANNELS];
+       } imc[NUM_IMC];
+};
+static int skx_num_sockets;
+
+struct skx_pvt {
+       struct skx_imc  *imc;
+};
+
+struct decoded_addr {
+       struct skx_dev *dev;
+       u64     addr;
+       int     socket;
+       int     imc;
+       int     channel;
+       u64     chan_addr;
+       int     sktways;
+       int     chanways;
+       int     dimm;
+       int     rank;
+       int     channel_rank;
+       u64     rank_address;
+       int     row;
+       int     column;
+       int     bank_address;
+       int     bank_group;
+};
+
+static struct skx_dev *get_skx_dev(u8 bus, u8 idx)
+{
+       struct skx_dev *d;
+
+       list_for_each_entry(d, &skx_edac_list, list) {
+               if (d->bus[idx] == bus)
+                       return d;
+       }
+
+       return NULL;
+}
+
+enum munittype {
+       CHAN0, CHAN1, CHAN2, SAD_ALL, UTIL_ALL, SAD
+};
+
+struct munit {
+       u16     did;
+       u16     devfn[NUM_IMC];
+       u8      busidx;
+       u8      per_socket;
+       enum munittype mtype;
+};
+
+/*
+ * List of PCI device ids that we need together with some device
+ * number and function numbers to tell which memory controller the
+ * device belongs to.
+ */
+static const struct munit skx_all_munits[] = {
+       { 0x2054, { }, 1, 1, SAD_ALL },
+       { 0x2055, { }, 1, 1, UTIL_ALL },
+       { 0x2040, { PCI_DEVFN(10, 0), PCI_DEVFN(12, 0) }, 2, 2, CHAN0 },
+       { 0x2044, { PCI_DEVFN(10, 4), PCI_DEVFN(12, 4) }, 2, 2, CHAN1 },
+       { 0x2048, { PCI_DEVFN(11, 0), PCI_DEVFN(13, 0) }, 2, 2, CHAN2 },
+       { 0x208e, { }, 1, 0, SAD },
+       { }
+};
+
+/*
+ * We use the per-socket device 0x2016 to count how many sockets are present,
+ * and to detemine which PCI buses are associated with each socket. Allocate
+ * and build the full list of all the skx_dev structures that we need here.
+ */
+static int get_all_bus_mappings(void)
+{
+       struct pci_dev *pdev, *prev;
+       struct skx_dev *d;
+       u32 reg;
+       int ndev = 0;
+
+       prev = NULL;
+       for (;;) {
+               pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2016, prev);
+               if (!pdev)
+                       break;
+               ndev++;
+               d = kzalloc(sizeof(*d), GFP_KERNEL);
+               if (!d) {
+                       pci_dev_put(pdev);
+                       return -ENOMEM;
+               }
+               pci_read_config_dword(pdev, 0xCC, &reg);
+               d->bus[0] =  GET_BITFIELD(reg, 0, 7);
+               d->bus[1] =  GET_BITFIELD(reg, 8, 15);
+               d->bus[2] =  GET_BITFIELD(reg, 16, 23);
+               d->bus[3] =  GET_BITFIELD(reg, 24, 31);
+               edac_dbg(2, "busses: %x, %x, %x, %x\n",
+                        d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
+               list_add_tail(&d->list, &skx_edac_list);
+               skx_num_sockets++;
+               prev = pdev;
+       }
+
+       return ndev;
+}
+
+static int get_all_munits(const struct munit *m)
+{
+       struct pci_dev *pdev, *prev;
+       struct skx_dev *d;
+       u32 reg;
+       int i = 0, ndev = 0;
+
+       prev = NULL;
+       for (;;) {
+               pdev = pci_get_device(PCI_VENDOR_ID_INTEL, m->did, prev);
+               if (!pdev)
+                       break;
+               ndev++;
+               if (m->per_socket == NUM_IMC) {
+                       for (i = 0; i < NUM_IMC; i++)
+                               if (m->devfn[i] == pdev->devfn)
+                                       break;
+                       if (i == NUM_IMC)
+                               goto fail;
+               }
+               d = get_skx_dev(pdev->bus->number, m->busidx);
+               if (!d)
+                       goto fail;
+
+               /* Be sure that the device is enabled */
+               if (unlikely(pci_enable_device(pdev) < 0)) {
+                       skx_printk(KERN_ERR,
+                               "Couldn't enable %04x:%04x\n", PCI_VENDOR_ID_INTEL, m->did);
+                       goto fail;
+               }
+
+               switch (m->mtype) {
+               case CHAN0: case CHAN1: case CHAN2:
+                       pci_dev_get(pdev);
+                       d->imc[i].chan[m->mtype].cdev = pdev;
+                       break;
+               case SAD_ALL:
+                       pci_dev_get(pdev);
+                       d->sad_all = pdev;
+                       break;
+               case UTIL_ALL:
+                       pci_dev_get(pdev);
+                       d->util_all = pdev;
+                       break;
+               case SAD:
+                       /*
+                        * one of these devices per core, including cores
+                        * that don't exist on this SKU. Ignore any that
+                        * read a route table of zero, make sure all the
+                        * non-zero values match.
+                        */
+                       pci_read_config_dword(pdev, 0xB4, &reg);
+                       if (reg != 0) {
+                               if (d->mcroute == 0)
+                                       d->mcroute = reg;
+                               else if (d->mcroute != reg) {
+                                       skx_printk(KERN_ERR,
+                                               "mcroute mismatch\n");
+                                       goto fail;
+                               }
+                       }
+                       ndev--;
+                       break;
+               }
+
+               prev = pdev;
+       }
+
+       return ndev;
+fail:
+       pci_dev_put(pdev);
+       return -ENODEV;
+}
+
+const struct x86_cpu_id skx_cpuids[] = {
+       { X86_VENDOR_INTEL, 6, 0x55, 0, 0 },    /* Skylake */
+       { }
+};
+MODULE_DEVICE_TABLE(x86cpu, skx_cpuids);
+
+static u8 get_src_id(struct skx_dev *d)
+{
+       u32 reg;
+
+       pci_read_config_dword(d->util_all, 0xF0, &reg);
+
+       return GET_BITFIELD(reg, 12, 14);
+}
+
+static u8 skx_get_node_id(struct skx_dev *d)
+{
+       u32 reg;
+
+       pci_read_config_dword(d->util_all, 0xF4, &reg);
+
+       return GET_BITFIELD(reg, 0, 2);
+}
+
+static int get_dimm_attr(u32 reg, int lobit, int hibit, int add, int minval,
+                        int maxval, char *name)
+{
+       u32 val = GET_BITFIELD(reg, lobit, hibit);
+
+       if (val < minval || val > maxval) {
+               edac_dbg(2, "bad %s = %d (raw=%x)\n", name, val, reg);
+               return -EINVAL;
+       }
+       return val + add;
+}
+
+#define IS_DIMM_PRESENT(mtr)           GET_BITFIELD((mtr), 15, 15)
+
+#define numrank(reg) get_dimm_attr((reg), 12, 13, 0, 1, 2, "ranks")
+#define numrow(reg) get_dimm_attr((reg), 2, 4, 12, 1, 6, "rows")
+#define numcol(reg) get_dimm_attr((reg), 0, 1, 10, 0, 2, "cols")
+
+static int get_width(u32 mtr)
+{
+       switch (GET_BITFIELD(mtr, 8, 9)) {
+       case 0:
+               return DEV_X4;
+       case 1:
+               return DEV_X8;
+       case 2:
+               return DEV_X16;
+       }
+       return DEV_UNKNOWN;
+}
+
+static int skx_get_hi_lo(void)
+{
+       struct pci_dev *pdev;
+       u32 reg;
+
+       pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2034, NULL);
+       if (!pdev) {
+               edac_dbg(0, "Can't get tolm/tohm\n");
+               return -ENODEV;
+       }
+
+       pci_read_config_dword(pdev, 0xD0, &reg);
+       skx_tolm = reg;
+       pci_read_config_dword(pdev, 0xD4, &reg);
+       skx_tohm = reg;
+       pci_read_config_dword(pdev, 0xD8, &reg);
+       skx_tohm |= (u64)reg << 32;
+
+       pci_dev_put(pdev);
+       edac_dbg(2, "tolm=%llx tohm=%llx\n", skx_tolm, skx_tohm);
+
+       return 0;
+}
+
+static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
+                        struct skx_imc *imc, int chan, int dimmno)
+{
+       int  banks = 16, ranks, rows, cols, npages;
+       u64 size;
+
+       if (!IS_DIMM_PRESENT(mtr))
+               return 0;
+       ranks = numrank(mtr);
+       rows = numrow(mtr);
+       cols = numcol(mtr);
+
+       /*
+        * Compute size in 8-byte (2^3) words, then shift to MiB (2^20)
+        */
+       size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3);
+       npages = MiB_TO_PAGES(size);
+
+       edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
+                imc->mc, chan, dimmno, size, npages,
+                banks, ranks, rows, cols);
+
+       imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mtr, 0, 0);
+       imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mtr, 9, 9);
+       imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0);
+       imc->chan[chan].dimms[dimmno].rowbits = rows;
+       imc->chan[chan].dimms[dimmno].colbits = cols;
+
+       dimm->nr_pages = npages;
+       dimm->grain = 32;
+       dimm->dtype = get_width(mtr);
+       dimm->mtype = MEM_DDR4;
+       dimm->edac_mode = EDAC_SECDED; /* likely better than this */
+       snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
+                imc->src_id, imc->lmc, chan, dimmno);
+
+       return 1;
+}
+
+#define SKX_GET_MTMTR(dev, reg) \
+       pci_read_config_dword((dev), 0x87c, &reg)
+
+static bool skx_check_ecc(struct pci_dev *pdev)
+{
+       u32 mtmtr;
+
+       SKX_GET_MTMTR(pdev, mtmtr);
+
+       return !!GET_BITFIELD(mtmtr, 2, 2);
+}
+
+static int skx_get_dimm_config(struct mem_ctl_info *mci)
+{
+       struct skx_pvt *pvt = mci->pvt_info;
+       struct skx_imc *imc = pvt->imc;
+       struct dimm_info *dimm;
+       int i, j;
+       u32 mtr, amap;
+       int ndimms;
+
+       for (i = 0; i < NUM_CHANNELS; i++) {
+               ndimms = 0;
+               pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap);
+               for (j = 0; j < NUM_DIMMS; j++) {
+                       dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+                                            mci->n_layers, i, j, 0);
+                       pci_read_config_dword(imc->chan[i].cdev,
+                                       0x80 + 4*j, &mtr);
+                       ndimms += get_dimm_info(mtr, amap, dimm, imc, i, j);
+               }
+               if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) {
+                       skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc);
+                       return -ENODEV;
+               }
+       }
+
+       return 0;
+}
+
+static void skx_unregister_mci(struct skx_imc *imc)
+{
+       struct mem_ctl_info *mci = imc->mci;
+
+       if (!mci)
+               return;
+
+       edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci);
+
+       /* Remove MC sysfs nodes */
+       edac_mc_del_mc(mci->pdev);
+
+       edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
+       kfree(mci->ctl_name);
+       edac_mc_free(mci);
+}
+
+static int skx_register_mci(struct skx_imc *imc)
+{
+       struct mem_ctl_info *mci;
+       struct edac_mc_layer layers[2];
+       struct pci_dev *pdev = imc->chan[0].cdev;
+       struct skx_pvt *pvt;
+       int rc;
+
+       /* allocate a new MC control structure */
+       layers[0].type = EDAC_MC_LAYER_CHANNEL;
+       layers[0].size = NUM_CHANNELS;
+       layers[0].is_virt_csrow = false;
+       layers[1].type = EDAC_MC_LAYER_SLOT;
+       layers[1].size = NUM_DIMMS;
+       layers[1].is_virt_csrow = true;
+       mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers,
+                           sizeof(struct skx_pvt));
+
+       if (unlikely(!mci))
+               return -ENOMEM;
+
+       edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci);
+
+       /* Associate skx_dev and mci for future usage */
+       imc->mci = mci;
+       pvt = mci->pvt_info;
+       pvt->imc = imc;
+
+       mci->ctl_name = kasprintf(GFP_KERNEL, "Skylake Socket#%d IMC#%d",
+                                 imc->node_id, imc->lmc);
+       mci->mtype_cap = MEM_FLAG_DDR4;
+       mci->edac_ctl_cap = EDAC_FLAG_NONE;
+       mci->edac_cap = EDAC_FLAG_NONE;
+       mci->mod_name = "skx_edac.c";
+       mci->dev_name = pci_name(imc->chan[0].cdev);
+       mci->mod_ver = SKX_REVISION;
+       mci->ctl_page_to_phys = NULL;
+
+       rc = skx_get_dimm_config(mci);
+       if (rc < 0)
+               goto fail;
+
+       /* record ptr to the generic device */
+       mci->pdev = &pdev->dev;
+
+       /* add this new MC control structure to EDAC's list of MCs */
+       if (unlikely(edac_mc_add_mc(mci))) {
+               edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
+               rc = -EINVAL;
+               goto fail;
+       }
+
+       return 0;
+
+fail:
+       kfree(mci->ctl_name);
+       edac_mc_free(mci);
+       imc->mci = NULL;
+       return rc;
+}
+
+#define        SKX_MAX_SAD 24
+
+#define SKX_GET_SAD(d, i, reg) \
+       pci_read_config_dword((d)->sad_all, 0x60 + 8 * (i), &reg)
+#define SKX_GET_ILV(d, i, reg) \
+       pci_read_config_dword((d)->sad_all, 0x64 + 8 * (i), &reg)
+
+#define        SKX_SAD_MOD3MODE(sad)   GET_BITFIELD((sad), 30, 31)
+#define        SKX_SAD_MOD3(sad)       GET_BITFIELD((sad), 27, 27)
+#define SKX_SAD_LIMIT(sad)     (((u64)GET_BITFIELD((sad), 7, 26) << 26) | MASK26)
+#define        SKX_SAD_MOD3ASMOD2(sad) GET_BITFIELD((sad), 5, 6)
+#define        SKX_SAD_ATTR(sad)       GET_BITFIELD((sad), 3, 4)
+#define        SKX_SAD_INTERLEAVE(sad) GET_BITFIELD((sad), 1, 2)
+#define SKX_SAD_ENABLE(sad)    GET_BITFIELD((sad), 0, 0)
+
+#define SKX_ILV_REMOTE(tgt)    (((tgt) & 8) == 0)
+#define SKX_ILV_TARGET(tgt)    ((tgt) & 7)
+
+static bool skx_sad_decode(struct decoded_addr *res)
+{
+       struct skx_dev *d = list_first_entry(&skx_edac_list, typeof(*d), list);
+       u64 addr = res->addr;
+       int i, idx, tgt, lchan, shift;
+       u32 sad, ilv;
+       u64 limit, prev_limit;
+       int remote = 0;
+
+       /* Simple sanity check for I/O space or out of range */
+       if (addr >= skx_tohm || (addr >= skx_tolm && addr < BIT_ULL(32))) {
+               edac_dbg(0, "Address %llx out of range\n", addr);
+               return false;
+       }
+
+restart:
+       prev_limit = 0;
+       for (i = 0; i < SKX_MAX_SAD; i++) {
+               SKX_GET_SAD(d, i, sad);
+               limit = SKX_SAD_LIMIT(sad);
+               if (SKX_SAD_ENABLE(sad)) {
+                       if (addr >= prev_limit && addr <= limit)
+                               goto sad_found;
+               }
+               prev_limit = limit + 1;
+       }
+       edac_dbg(0, "No SAD entry for %llx\n", addr);
+       return false;
+
+sad_found:
+       SKX_GET_ILV(d, i, ilv);
+
+       switch (SKX_SAD_INTERLEAVE(sad)) {
+       case 0:
+               idx = GET_BITFIELD(addr, 6, 8);
+               break;
+       case 1:
+               idx = GET_BITFIELD(addr, 8, 10);
+               break;
+       case 2:
+               idx = GET_BITFIELD(addr, 12, 14);
+               break;
+       case 3:
+               idx = GET_BITFIELD(addr, 30, 32);
+               break;
+       }
+
+       tgt = GET_BITFIELD(ilv, 4 * idx, 4 * idx + 3);
+
+       /* If point to another node, find it and start over */
+       if (SKX_ILV_REMOTE(tgt)) {
+               if (remote) {
+                       edac_dbg(0, "Double remote!\n");
+                       return false;
+               }
+               remote = 1;
+               list_for_each_entry(d, &skx_edac_list, list) {
+                       if (d->imc[0].src_id == SKX_ILV_TARGET(tgt))
+                               goto restart;
+               }
+               edac_dbg(0, "Can't find node %d\n", SKX_ILV_TARGET(tgt));
+               return false;
+       }
+
+       if (SKX_SAD_MOD3(sad) == 0)
+               lchan = SKX_ILV_TARGET(tgt);
+       else {
+               switch (SKX_SAD_MOD3MODE(sad)) {
+               case 0:
+                       shift = 6;
+                       break;
+               case 1:
+                       shift = 8;
+                       break;
+               case 2:
+                       shift = 12;
+                       break;
+               default:
+                       edac_dbg(0, "illegal mod3mode\n");
+                       return false;
+               }
+               switch (SKX_SAD_MOD3ASMOD2(sad)) {
+               case 0:
+                       lchan = (addr >> shift) % 3;
+                       break;
+               case 1:
+                       lchan = (addr >> shift) % 2;
+                       break;
+               case 2:
+                       lchan = (addr >> shift) % 2;
+                       lchan = (lchan << 1) | ~lchan;
+                       break;
+               case 3:
+                       lchan = ((addr >> shift) % 2) << 1;
+                       break;
+               }
+               lchan = (lchan << 1) | (SKX_ILV_TARGET(tgt) & 1);
+       }
+
+       res->dev = d;
+       res->socket = d->imc[0].src_id;
+       res->imc = GET_BITFIELD(d->mcroute, lchan * 3, lchan * 3 + 2);
+       res->channel = GET_BITFIELD(d->mcroute, lchan * 2 + 18, lchan * 2 + 19);
+
+       edac_dbg(2, "%llx: socket=%d imc=%d channel=%d\n",
+                res->addr, res->socket, res->imc, res->channel);
+       return true;
+}
+
+#define        SKX_MAX_TAD 8
+
+#define SKX_GET_TADBASE(d, mc, i, reg)                 \
+       pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x850 + 4 * (i), &reg)
+#define SKX_GET_TADWAYNESS(d, mc, i, reg)              \
+       pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x880 + 4 * (i), &reg)
+#define SKX_GET_TADCHNILVOFFSET(d, mc, ch, i, reg)     \
+       pci_read_config_dword((d)->imc[mc].chan[ch].cdev, 0x90 + 4 * (i), &reg)
+
+#define        SKX_TAD_BASE(b)         ((u64)GET_BITFIELD((b), 12, 31) << 26)
+#define SKX_TAD_SKT_GRAN(b)    GET_BITFIELD((b), 4, 5)
+#define SKX_TAD_CHN_GRAN(b)    GET_BITFIELD((b), 6, 7)
+#define        SKX_TAD_LIMIT(b)        (((u64)GET_BITFIELD((b), 12, 31) << 26) | MASK26)
+#define        SKX_TAD_OFFSET(b)       ((u64)GET_BITFIELD((b), 4, 23) << 26)
+#define        SKX_TAD_SKTWAYS(b)      (1 << GET_BITFIELD((b), 10, 11))
+#define        SKX_TAD_CHNWAYS(b)      (GET_BITFIELD((b), 8, 9) + 1)
+
+/* which bit used for both socket and channel interleave */
+static int skx_granularity[] = { 6, 8, 12, 30 };
+
+static u64 skx_do_interleave(u64 addr, int shift, int ways, u64 lowbits)
+{
+       addr >>= shift;
+       addr /= ways;
+       addr <<= shift;
+
+       return addr | (lowbits & ((1ull << shift) - 1));
+}
+
+static bool skx_tad_decode(struct decoded_addr *res)
+{
+       int i;
+       u32 base, wayness, chnilvoffset;
+       int skt_interleave_bit, chn_interleave_bit;
+       u64 channel_addr;
+
+       for (i = 0; i < SKX_MAX_TAD; i++) {
+               SKX_GET_TADBASE(res->dev, res->imc, i, base);
+               SKX_GET_TADWAYNESS(res->dev, res->imc, i, wayness);
+               if (SKX_TAD_BASE(base) <= res->addr && res->addr <= SKX_TAD_LIMIT(wayness))
+                       goto tad_found;
+       }
+       edac_dbg(0, "No TAD entry for %llx\n", res->addr);
+       return false;
+
+tad_found:
+       res->sktways = SKX_TAD_SKTWAYS(wayness);
+       res->chanways = SKX_TAD_CHNWAYS(wayness);
+       skt_interleave_bit = skx_granularity[SKX_TAD_SKT_GRAN(base)];
+       chn_interleave_bit = skx_granularity[SKX_TAD_CHN_GRAN(base)];
+
+       SKX_GET_TADCHNILVOFFSET(res->dev, res->imc, res->channel, i, chnilvoffset);
+       channel_addr = res->addr - SKX_TAD_OFFSET(chnilvoffset);
+
+       if (res->chanways == 3 && skt_interleave_bit > chn_interleave_bit) {
+               /* Must handle channel first, then socket */
+               channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit,
+                                                res->chanways, channel_addr);
+               channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit,
+                                                res->sktways, channel_addr);
+       } else {
+               /* Handle socket then channel. Preserve low bits from original address */
+               channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit,
+                                                res->sktways, res->addr);
+               channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit,
+                                                res->chanways, res->addr);
+       }
+
+       res->chan_addr = channel_addr;
+
+       edac_dbg(2, "%llx: chan_addr=%llx sktways=%d chanways=%d\n",
+                res->addr, res->chan_addr, res->sktways, res->chanways);
+       return true;
+}
+
+#define SKX_MAX_RIR 4
+
+#define SKX_GET_RIRWAYNESS(d, mc, ch, i, reg)          \
+       pci_read_config_dword((d)->imc[mc].chan[ch].cdev,       \
+                             0x108 + 4 * (i), &reg)
+#define SKX_GET_RIRILV(d, mc, ch, idx, i, reg)         \
+       pci_read_config_dword((d)->imc[mc].chan[ch].cdev,       \
+                             0x120 + 16 * idx + 4 * (i), &reg)
+
+#define        SKX_RIR_VALID(b) GET_BITFIELD((b), 31, 31)
+#define        SKX_RIR_LIMIT(b) (((u64)GET_BITFIELD((b), 1, 11) << 29) | MASK29)
+#define        SKX_RIR_WAYS(b) (1 << GET_BITFIELD((b), 28, 29))
+#define        SKX_RIR_CHAN_RANK(b) GET_BITFIELD((b), 16, 19)
+#define        SKX_RIR_OFFSET(b) ((u64)(GET_BITFIELD((b), 2, 15) << 26))
+
+static bool skx_rir_decode(struct decoded_addr *res)
+{
+       int i, idx, chan_rank;
+       int shift;
+       u32 rirway, rirlv;
+       u64 rank_addr, prev_limit = 0, limit;
+
+       if (res->dev->imc[res->imc].chan[res->channel].dimms[0].close_pg)
+               shift = 6;
+       else
+               shift = 13;
+
+       for (i = 0; i < SKX_MAX_RIR; i++) {
+               SKX_GET_RIRWAYNESS(res->dev, res->imc, res->channel, i, rirway);
+               limit = SKX_RIR_LIMIT(rirway);
+               if (SKX_RIR_VALID(rirway)) {
+                       if (prev_limit <= res->chan_addr &&
+                           res->chan_addr <= limit)
+                               goto rir_found;
+               }
+               prev_limit = limit;
+       }
+       edac_dbg(0, "No RIR entry for %llx\n", res->addr);
+       return false;
+
+rir_found:
+       rank_addr = res->chan_addr >> shift;
+       rank_addr /= SKX_RIR_WAYS(rirway);
+       rank_addr <<= shift;
+       rank_addr |= res->chan_addr & GENMASK_ULL(shift - 1, 0);
+
+       res->rank_address = rank_addr;
+       idx = (res->chan_addr >> shift) % SKX_RIR_WAYS(rirway);
+
+       SKX_GET_RIRILV(res->dev, res->imc, res->channel, idx, i, rirlv);
+       res->rank_address = rank_addr - SKX_RIR_OFFSET(rirlv);
+       chan_rank = SKX_RIR_CHAN_RANK(rirlv);
+       res->channel_rank = chan_rank;
+       res->dimm = chan_rank / 4;
+       res->rank = chan_rank % 4;
+
+       edac_dbg(2, "%llx: dimm=%d rank=%d chan_rank=%d rank_addr=%llx\n",
+                res->addr, res->dimm, res->rank,
+                res->channel_rank, res->rank_address);
+       return true;
+}
+
+static u8 skx_close_row[] = {
+       15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33
+};
+static u8 skx_close_column[] = {
+       3, 4, 5, 14, 19, 23, 24, 25, 26, 27
+};
+static u8 skx_open_row[] = {
+       14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33
+};
+static u8 skx_open_column[] = {
+       3, 4, 5, 6, 7, 8, 9, 10, 11, 12
+};
+static u8 skx_open_fine_column[] = {
+       3, 4, 5, 7, 8, 9, 10, 11, 12, 13
+};
+
+static int skx_bits(u64 addr, int nbits, u8 *bits)
+{
+       int i, res = 0;
+
+       for (i = 0; i < nbits; i++)
+               res |= ((addr >> bits[i]) & 1) << i;
+       return res;
+}
+
+static int skx_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1)
+{
+       int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1);
+
+       if (do_xor)
+               ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1);
+
+       return ret;
+}
+
+static bool skx_mad_decode(struct decoded_addr *r)
+{
+       struct skx_dimm *dimm = &r->dev->imc[r->imc].chan[r->channel].dimms[r->dimm];
+       int bg0 = dimm->fine_grain_bank ? 6 : 13;
+
+       if (dimm->close_pg) {
+               r->row = skx_bits(r->rank_address, dimm->rowbits, skx_close_row);
+               r->column = skx_bits(r->rank_address, dimm->colbits, skx_close_column);
+               r->column |= 0x400; /* C10 is autoprecharge, always set */
+               r->bank_address = skx_bank_bits(r->rank_address, 8, 9, dimm->bank_xor_enable, 22, 28);
+               r->bank_group = skx_bank_bits(r->rank_address, 6, 7, dimm->bank_xor_enable, 20, 21);
+       } else {
+               r->row = skx_bits(r->rank_address, dimm->rowbits, skx_open_row);
+               if (dimm->fine_grain_bank)
+                       r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_fine_column);
+               else
+                       r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_column);
+               r->bank_address = skx_bank_bits(r->rank_address, 18, 19, dimm->bank_xor_enable, 22, 23);
+               r->bank_group = skx_bank_bits(r->rank_address, bg0, 17, dimm->bank_xor_enable, 20, 21);
+       }
+       r->row &= (1u << dimm->rowbits) - 1;
+
+       edac_dbg(2, "%llx: row=%x col=%x bank_addr=%d bank_group=%d\n",
+                r->addr, r->row, r->column, r->bank_address,
+                r->bank_group);
+       return true;
+}
+
+static bool skx_decode(struct decoded_addr *res)
+{
+
+       return skx_sad_decode(res) && skx_tad_decode(res) &&
+               skx_rir_decode(res) && skx_mad_decode(res);
+}
+
+#ifdef CONFIG_EDAC_DEBUG
+/*
+ * Debug feature. Make /sys/kernel/debug/skx_edac_test/addr.
+ * Write an address to this file to exercise the address decode
+ * logic in this driver.
+ */
+static struct dentry *skx_test;
+static u64 skx_fake_addr;
+
+static int debugfs_u64_set(void *data, u64 val)
+{
+       struct decoded_addr res;
+
+       res.addr = val;
+       skx_decode(&res);
+
+       return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
+
+static struct dentry *mydebugfs_create(const char *name, umode_t mode,
+                                      struct dentry *parent, u64 *value)
+{
+       return debugfs_create_file(name, mode, parent, value, &fops_u64_wo);
+}
+
+static void setup_skx_debug(void)
+{
+       skx_test = debugfs_create_dir("skx_edac_test", NULL);
+       mydebugfs_create("addr", S_IWUSR, skx_test, &skx_fake_addr);
+}
+
+static void teardown_skx_debug(void)
+{
+       debugfs_remove_recursive(skx_test);
+}
+#else
+static void setup_skx_debug(void)
+{
+}
+
+static void teardown_skx_debug(void)
+{
+}
+#endif /*CONFIG_EDAC_DEBUG*/
+
+static void skx_mce_output_error(struct mem_ctl_info *mci,
+                                const struct mce *m,
+                                struct decoded_addr *res)
+{
+       enum hw_event_mc_err_type tp_event;
+       char *type, *optype, msg[256];
+       bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
+       bool overflow = GET_BITFIELD(m->status, 62, 62);
+       bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
+       bool recoverable;
+       u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
+       u32 mscod = GET_BITFIELD(m->status, 16, 31);
+       u32 errcode = GET_BITFIELD(m->status, 0, 15);
+       u32 optypenum = GET_BITFIELD(m->status, 4, 6);
+
+       recoverable = GET_BITFIELD(m->status, 56, 56);
+
+       if (uncorrected_error) {
+               if (ripv) {
+                       type = "FATAL";
+                       tp_event = HW_EVENT_ERR_FATAL;
+               } else {
+                       type = "NON_FATAL";
+                       tp_event = HW_EVENT_ERR_UNCORRECTED;
+               }
+       } else {
+               type = "CORRECTED";
+               tp_event = HW_EVENT_ERR_CORRECTED;
+       }
+
+       /*
+        * According with Table 15-9 of the Intel Architecture spec vol 3A,
+        * memory errors should fit in this mask:
+        *      000f 0000 1mmm cccc (binary)
+        * where:
+        *      f = Correction Report Filtering Bit. If 1, subsequent errors
+        *          won't be shown
+        *      mmm = error type
+        *      cccc = channel
+        * If the mask doesn't match, report an error to the parsing logic
+        */
+       if (!((errcode & 0xef80) == 0x80)) {
+               optype = "Can't parse: it is not a mem";
+       } else {
+               switch (optypenum) {
+               case 0:
+                       optype = "generic undef request error";
+                       break;
+               case 1:
+                       optype = "memory read error";
+                       break;
+               case 2:
+                       optype = "memory write error";
+                       break;
+               case 3:
+                       optype = "addr/cmd error";
+                       break;
+               case 4:
+                       optype = "memory scrubbing error";
+                       break;
+               default:
+                       optype = "reserved";
+                       break;
+               }
+       }
+
+       snprintf(msg, sizeof(msg),
+                "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
+                overflow ? " OVERFLOW" : "",
+                (uncorrected_error && recoverable) ? " recoverable" : "",
+                mscod, errcode,
+                res->socket, res->imc, res->rank,
+                res->bank_group, res->bank_address, res->row, res->column);
+
+       edac_dbg(0, "%s\n", msg);
+
+       /* Call the helper to output message */
+       edac_mc_handle_error(tp_event, mci, core_err_cnt,
+                            m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
+                            res->channel, res->dimm, -1,
+                            optype, msg);
+}
+
+static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
+                              void *data)
+{
+       struct mce *mce = (struct mce *)data;
+       struct decoded_addr res;
+       struct mem_ctl_info *mci;
+       char *type;
+
+       if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+               return NOTIFY_DONE;
+
+       /* ignore unless this is memory related with an address */
+       if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
+               return NOTIFY_DONE;
+
+       res.addr = mce->addr;
+       if (!skx_decode(&res))
+               return NOTIFY_DONE;
+       mci = res.dev->imc[res.imc].mci;
+
+       if (mce->mcgstatus & MCG_STATUS_MCIP)
+               type = "Exception";
+       else
+               type = "Event";
+
+       skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n");
+
+       skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: %Lx "
+                         "Bank %d: %016Lx\n", mce->extcpu, type,
+                         mce->mcgstatus, mce->bank, mce->status);
+       skx_mc_printk(mci, KERN_DEBUG, "TSC %llx ", mce->tsc);
+       skx_mc_printk(mci, KERN_DEBUG, "ADDR %llx ", mce->addr);
+       skx_mc_printk(mci, KERN_DEBUG, "MISC %llx ", mce->misc);
+
+       skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:%x TIME %llu SOCKET "
+                         "%u APIC %x\n", mce->cpuvendor, mce->cpuid,
+                         mce->time, mce->socketid, mce->apicid);
+
+       skx_mce_output_error(mci, mce, &res);
+
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block skx_mce_dec = {
+       .notifier_call = skx_mce_check_error,
+};
+
+static void skx_remove(void)
+{
+       int i, j;
+       struct skx_dev *d, *tmp;
+
+       edac_dbg(0, "\n");
+
+       list_for_each_entry_safe(d, tmp, &skx_edac_list, list) {
+               list_del(&d->list);
+               for (i = 0; i < NUM_IMC; i++) {
+                       skx_unregister_mci(&d->imc[i]);
+                       for (j = 0; j < NUM_CHANNELS; j++)
+                               pci_dev_put(d->imc[i].chan[j].cdev);
+               }
+               pci_dev_put(d->util_all);
+               pci_dev_put(d->sad_all);
+
+               kfree(d);
+       }
+}
+
+/*
+ * skx_init:
+ *     make sure we are running on the correct cpu model
+ *     search for all the devices we need
+ *     check which DIMMs are present.
+ */
+int __init skx_init(void)
+{
+       const struct x86_cpu_id *id;
+       const struct munit *m;
+       int rc = 0, i;
+       u8 mc = 0, src_id, node_id;
+       struct skx_dev *d;
+
+       edac_dbg(2, "\n");
+
+       id = x86_match_cpu(skx_cpuids);
+       if (!id)
+               return -ENODEV;
+
+       rc = skx_get_hi_lo();
+       if (rc)
+               return rc;
+
+       rc = get_all_bus_mappings();
+       if (rc < 0)
+               goto fail;
+       if (rc == 0) {
+               edac_dbg(2, "No memory controllers found\n");
+               return -ENODEV;
+       }
+
+       for (m = skx_all_munits; m->did; m++) {
+               rc = get_all_munits(m);
+               if (rc < 0)
+                       goto fail;
+               if (rc != m->per_socket * skx_num_sockets) {
+                       edac_dbg(2, "Expected %d, got %d of %x\n",
+                                m->per_socket * skx_num_sockets, rc, m->did);
+                       rc = -ENODEV;
+                       goto fail;
+               }
+       }
+
+       list_for_each_entry(d, &skx_edac_list, list) {
+               src_id = get_src_id(d);
+               node_id = skx_get_node_id(d);
+               edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id);
+               for (i = 0; i < NUM_IMC; i++) {
+                       d->imc[i].mc = mc++;
+                       d->imc[i].lmc = i;
+                       d->imc[i].src_id = src_id;
+                       d->imc[i].node_id = node_id;
+                       rc = skx_register_mci(&d->imc[i]);
+                       if (rc < 0)
+                               goto fail;
+               }
+       }
+
+       /* Ensure that the OPSTATE is set correctly for POLL or NMI */
+       opstate_init();
+
+       setup_skx_debug();
+
+       mce_register_decode_chain(&skx_mce_dec);
+
+       return 0;
+fail:
+       skx_remove();
+       return rc;
+}
+
+static void __exit skx_exit(void)
+{
+       edac_dbg(2, "\n");
+       mce_unregister_decode_chain(&skx_mce_dec);
+       skx_remove();
+       teardown_skx_debug();
+}
+
+module_init(skx_init);
+module_exit(skx_exit);
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tony Luck");
+MODULE_DESCRIPTION("MC Driver for Intel Skylake server processors");
index 4388937..ce2bc2a 100644 (file)
@@ -709,9 +709,10 @@ static int scpi_probe(struct platform_device *pdev)
                struct mbox_client *cl = &pchan->cl;
                struct device_node *shmem = of_parse_phandle(np, "shmem", idx);
 
-               if (of_address_to_resource(shmem, 0, &res)) {
+               ret = of_address_to_resource(shmem, 0, &res);
+               of_node_put(shmem);
+               if (ret) {
                        dev_err(dev, "failed to get SCPI payload mem resource\n");
-                       ret = -EINVAL;
                        goto err;
                }
 
index 94a58a0..44c0139 100644 (file)
@@ -229,14 +229,14 @@ static int __init dmi_id_init(void)
 
        ret = device_register(dmi_dev);
        if (ret)
-               goto fail_free_dmi_dev;
+               goto fail_put_dmi_dev;
 
        return 0;
 
-fail_free_dmi_dev:
-       kfree(dmi_dev);
-fail_class_unregister:
+fail_put_dmi_dev:
+       put_device(dmi_dev);
 
+fail_class_unregister:
        class_unregister(&dmi_class);
 
        return ret;
index 98dd47a..24caedb 100644 (file)
@@ -50,6 +50,7 @@ config GPIO_DEVRES
 config OF_GPIO
        def_bool y
        depends on OF
+       depends on HAS_IOMEM
 
 config GPIO_ACPI
        def_bool y
@@ -188,7 +189,7 @@ config GPIO_EP93XX
 config GPIO_ETRAXFS
        bool "Axis ETRAX FS General I/O"
        depends on CRIS || COMPILE_TEST
-       depends on OF
+       depends on OF_GPIO
        select GPIO_GENERIC
        select GPIOLIB_IRQCHIP
        help
@@ -214,7 +215,7 @@ config GPIO_GENERIC_PLATFORM
 
 config GPIO_GRGPIO
        tristate "Aeroflex Gaisler GRGPIO support"
-       depends on OF
+       depends on OF_GPIO
        select GPIO_GENERIC
        select IRQ_DOMAIN
        help
@@ -312,7 +313,7 @@ config GPIO_MPC8XXX
 config GPIO_MVEBU
        def_bool y
        depends on PLAT_ORION
-       depends on OF
+       depends on OF_GPIO
        select GENERIC_IRQ_CHIP
 
 config GPIO_MXC
@@ -405,7 +406,7 @@ config GPIO_TEGRA
        bool "NVIDIA Tegra GPIO support"
        default ARCH_TEGRA
        depends on ARCH_TEGRA || COMPILE_TEST
-       depends on OF
+       depends on OF_GPIO
        help
          Say yes here to support GPIO pins on NVIDIA Tegra SoCs.
 
@@ -1099,7 +1100,7 @@ menu "SPI GPIO expanders"
 
 config GPIO_74X164
        tristate "74x164 serial-in/parallel-out 8-bits shift register"
-       depends on OF
+       depends on OF_GPIO
        help
          Driver for 74x164 compatible serial-in/parallel-out 8-outputs
          shift registers. This driver can be used to provide access
@@ -1130,6 +1131,7 @@ menu "SPI or I2C GPIO expanders"
 
 config GPIO_MCP23S08
        tristate "Microchip MCP23xxx I/O expander"
+       depends on OF_GPIO
        select GPIOLIB_IRQCHIP
        help
          SPI/I2C driver for Microchip MCP23S08/MCP23S17/MCP23008/MCP23017
index 0880736..946d091 100644 (file)
@@ -192,6 +192,10 @@ int __max730x_probe(struct max7301 *ts)
        ts->chip.parent = dev;
        ts->chip.owner = THIS_MODULE;
 
+       ret = gpiochip_add_data(&ts->chip, ts);
+       if (ret)
+               goto exit_destroy;
+
        /*
         * initialize pullups according to platform data and cache the
         * register values for later use.
@@ -213,10 +217,6 @@ int __max730x_probe(struct max7301 *ts)
                }
        }
 
-       ret = gpiochip_add_data(&ts->chip, ts);
-       if (ret)
-               goto exit_destroy;
-
        return ret;
 
 exit_destroy:
index ac22efc..99d37b5 100644 (file)
@@ -564,7 +564,7 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev,
        mcp->chip.direction_output = mcp23s08_direction_output;
        mcp->chip.set = mcp23s08_set;
        mcp->chip.dbg_show = mcp23s08_dbg_show;
-#ifdef CONFIG_OF
+#ifdef CONFIG_OF_GPIO
        mcp->chip.of_gpio_n_cells = 2;
        mcp->chip.of_node = dev->of_node;
 #endif
index 0c99e8f..8d8ee0e 100644 (file)
@@ -155,7 +155,7 @@ static int sa1100_gpio_irqdomain_map(struct irq_domain *d,
 {
        irq_set_chip_and_handler(irq, &sa1100_gpio_irq_chip,
                                 handle_edge_irq);
-       irq_set_noprobe(irq);
+       irq_set_probe(irq);
 
        return 0;
 }
index 75e7b39..a28feb3 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/io.h>
-#include <linux/io-mapping.h>
 #include <linux/gpio/consumer.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
index 8ebc5f1..700c56b 100644 (file)
@@ -426,6 +426,8 @@ struct amdgpu_mman {
 
        /* custom LRU management */
        struct amdgpu_mman_lru                  log2_size[AMDGPU_TTM_LRU_SIZE];
+       /* guard for log2_size array, don't add anything in between */
+       struct amdgpu_mman_lru                  guard;
 };
 
 int amdgpu_copy_buffer(struct amdgpu_ring *ring,
@@ -646,9 +648,9 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
 void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
 int amdgpu_gart_init(struct amdgpu_device *adev);
 void amdgpu_gart_fini(struct amdgpu_device *adev);
-void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
+void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
                        int pages);
-int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
+int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
                     int pages, struct page **pagelist,
                     dma_addr_t *dma_addr, uint32_t flags);
 
index 9831753..fe872b8 100644 (file)
@@ -321,6 +321,19 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *
                            (le16_to_cpu(path->usConnObjectId) &
                             OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
 
+                       /* Skip TV/CV support */
+                       if ((le16_to_cpu(path->usDeviceTag) ==
+                            ATOM_DEVICE_TV1_SUPPORT) ||
+                           (le16_to_cpu(path->usDeviceTag) ==
+                            ATOM_DEVICE_CV_SUPPORT))
+                               continue;
+
+                       if (con_obj_id >= ARRAY_SIZE(object_connector_convert)) {
+                               DRM_ERROR("invalid con_obj_id %d for device tag 0x%04x\n",
+                                         con_obj_id, le16_to_cpu(path->usDeviceTag));
+                               continue;
+                       }
+
                        connector_type =
                                object_connector_convert[con_obj_id];
                        connector_object_id = con_obj_id;
index 49de926..10b5ddf 100644 (file)
@@ -200,16 +200,7 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
        atpx->is_hybrid = false;
        if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
                printk("ATPX Hybrid Graphics\n");
-#if 1
-               /* This is a temporary hack until the D3 cold support
-                * makes it upstream.  The ATPX power_control method seems
-                * to still work on even if the system should be using
-                * the new standardized hybrid D3 cold ACPI interface.
-                */
-               atpx->functions.power_cntl = true;
-#else
                atpx->functions.power_cntl = false;
-#endif
                atpx->is_hybrid = true;
        }
 
index 921bce2..0feea34 100644 (file)
@@ -221,7 +221,7 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
  * Unbinds the requested pages from the gart page table and
  * replaces them with the dummy page (all asics).
  */
-void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
+void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
                        int pages)
 {
        unsigned t;
@@ -268,7 +268,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
  * (all asics).
  * Returns 0 for success, -EINVAL for failure.
  */
-int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
+int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
                     int pages, struct page **pagelist, dma_addr_t *dma_addr,
                     uint32_t flags)
 {
index a31d7ef..ec1282a 100644 (file)
@@ -280,7 +280,7 @@ void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
 int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 {
        unsigned i;
-       int r;
+       int r, ret = 0;
 
        for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
                struct amdgpu_ring *ring = adev->rings[i];
@@ -301,10 +301,11 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
                        } else {
                                /* still not good, but we can live with it */
                                DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r);
+                               ret = r;
                        }
                }
        }
-       return 0;
+       return ret;
 }
 
 /*
index 9b61c8b..716f2af 100644 (file)
@@ -251,8 +251,8 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 
        adev = amdgpu_get_adev(bo->bdev);
        ring = adev->mman.buffer_funcs_ring;
-       old_start = old_mem->start << PAGE_SHIFT;
-       new_start = new_mem->start << PAGE_SHIFT;
+       old_start = (u64)old_mem->start << PAGE_SHIFT;
+       new_start = (u64)new_mem->start << PAGE_SHIFT;
 
        switch (old_mem->mem_type) {
        case TTM_PL_VRAM:
@@ -950,6 +950,8 @@ static struct list_head *amdgpu_ttm_lru_tail(struct ttm_buffer_object *tbo)
        struct list_head *res = lru->lru[tbo->mem.mem_type];
 
        lru->lru[tbo->mem.mem_type] = &tbo->lru;
+       while ((++lru)->lru[tbo->mem.mem_type] == res)
+               lru->lru[tbo->mem.mem_type] = &tbo->lru;
 
        return res;
 }
@@ -960,6 +962,8 @@ static struct list_head *amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object *tbo)
        struct list_head *res = lru->swap_lru;
 
        lru->swap_lru = &tbo->swap;
+       while ((++lru)->swap_lru == res)
+               lru->swap_lru = &tbo->swap;
 
        return res;
 }
@@ -1011,6 +1015,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
                lru->swap_lru = &adev->mman.bdev.glob->swap_lru;
        }
 
+       for (j = 0; j < TTM_NUM_MEM_TYPES; ++j)
+               adev->mman.guard.lru[j] = NULL;
+       adev->mman.guard.swap_lru = NULL;
+
        adev->mman.initialized = true;
        r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
                                adev->mc.real_vram_size >> PAGE_SHIFT);
index b11f4e8..4aa993d 100644 (file)
@@ -1187,7 +1187,8 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
                r = 0;
        }
 
-error:
        fence_put(fence);
+
+error:
        return r;
 }
index 8e642fc..80120fa 100644 (file)
@@ -1535,7 +1535,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        r = amd_sched_entity_init(&ring->sched, &vm->entity,
                                  rq, amdgpu_sched_jobs);
        if (r)
-               return r;
+               goto err;
 
        vm->page_directory_fence = NULL;
 
@@ -1565,6 +1565,9 @@ error_free_page_directory:
 error_free_sched_entity:
        amd_sched_entity_fini(&ring->sched, &vm->entity);
 
+err:
+       drm_free_large(vm->page_tables);
+
        return r;
 }
 
index ee64669..77fdd99 100644 (file)
@@ -52,6 +52,7 @@ static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev);
 static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev);
 static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
 static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
+static int cik_sdma_soft_reset(void *handle);
 
 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
 MODULE_FIRMWARE("radeon/bonaire_sdma1.bin");
@@ -1037,6 +1038,8 @@ static int cik_sdma_resume(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       cik_sdma_soft_reset(handle);
+
        return cik_sdma_hw_init(adev);
 }
 
index d869d05..425413f 100644 (file)
@@ -2755,8 +2755,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
        u64 wb_gpu_addr;
        u32 *buf;
        struct bonaire_mqd *mqd;
-
-       gfx_v7_0_cp_compute_enable(adev, true);
+       struct amdgpu_ring *ring;
 
        /* fix up chicken bits */
        tmp = RREG32(mmCP_CPF_DEBUG);
@@ -2791,7 +2790,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
 
        /* init the queues.  Just two for now. */
        for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-               struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+               ring = &adev->gfx.compute_ring[i];
 
                if (ring->mqd_obj == NULL) {
                        r = amdgpu_bo_create(adev,
@@ -2970,6 +2969,13 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
                amdgpu_bo_unreserve(ring->mqd_obj);
 
                ring->ready = true;
+       }
+
+       gfx_v7_0_cp_compute_enable(adev, true);
+
+       for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+               ring = &adev->gfx.compute_ring[i];
+
                r = amdgpu_ring_test_ring(ring);
                if (r)
                        ring->ready = false;
index 1351c7e..a64715d 100644 (file)
@@ -714,7 +714,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
                DRM_ERROR("amdgpu: IB test timed out\n");
                r = -ETIMEDOUT;
                goto err1;
-       } else if (r) {
+       } else if (r < 0) {
                DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
                goto err1;
        }
index e621eba..a7d3cb3 100644 (file)
@@ -184,7 +184,7 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
                                                        sizeof(u32)) + inx;
 
        pr_debug("kfd: get kernel queue doorbell\n"
-                        "     doorbell offset   == 0x%08d\n"
+                        "     doorbell offset   == 0x%08X\n"
                         "     kernel address    == 0x%08lX\n",
                *doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx));
 
index ef312bb..963a24d 100644 (file)
@@ -405,7 +405,7 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler *sched)
        spin_lock(&sched->job_list_lock);
        s_job = list_first_entry_or_null(&sched->ring_mirror_list,
                                         struct amd_sched_job, node);
-       if (s_job)
+       if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
                schedule_delayed_work(&s_job->work_tdr, sched->timeout);
 
        list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
index fa39307..2a3ded4 100644 (file)
@@ -475,7 +475,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
                                        val,
                                        -1,
                                        &replaced);
-               state->color_mgmt_changed = replaced;
+               state->color_mgmt_changed |= replaced;
                return ret;
        } else if (property == config->ctm_property) {
                ret = drm_atomic_replace_property_blob_from_id(crtc,
@@ -483,7 +483,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
                                        val,
                                        sizeof(struct drm_color_ctm),
                                        &replaced);
-               state->color_mgmt_changed = replaced;
+               state->color_mgmt_changed |= replaced;
                return ret;
        } else if (property == config->gamma_lut_property) {
                ret = drm_atomic_replace_property_blob_from_id(crtc,
@@ -491,7 +491,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
                                        val,
                                        -1,
                                        &replaced);
-               state->color_mgmt_changed = replaced;
+               state->color_mgmt_changed |= replaced;
                return ret;
        } else if (crtc->funcs->atomic_set_property)
                return crtc->funcs->atomic_set_property(crtc, state, property, val);
index b1dbb60..ddebe54 100644 (file)
@@ -5404,6 +5404,9 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
        struct drm_pending_vblank_event *e = NULL;
        int ret = -EINVAL;
 
+       if (!drm_core_check_feature(dev, DRIVER_MODESET))
+               return -EINVAL;
+
        if (page_flip->flags & ~DRM_MODE_PAGE_FLIP_FLAGS ||
            page_flip->reserved != 0)
                return -EINVAL;
index ce54e98..0a06f91 100644 (file)
@@ -464,7 +464,7 @@ static bool drm_fb_helper_is_bound(struct drm_fb_helper *fb_helper)
 
        /* Sometimes user space wants everything disabled, so don't steal the
         * display if there's a master. */
-       if (lockless_dereference(dev->master))
+       if (READ_ONCE(dev->master))
                return false;
 
        drm_for_each_crtc(crtc, dev) {
index 87ef341..b382cf5 100644 (file)
@@ -1333,8 +1333,6 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
        if (ret < 0)
                return ret;
 
-       mutex_lock(&gpu->lock);
-
        /*
         * TODO
         *
@@ -1348,16 +1346,18 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
        if (unlikely(event == ~0U)) {
                DRM_ERROR("no free event\n");
                ret = -EBUSY;
-               goto out_unlock;
+               goto out_pm_put;
        }
 
        fence = etnaviv_gpu_fence_alloc(gpu);
        if (!fence) {
                event_free(gpu, event);
                ret = -ENOMEM;
-               goto out_unlock;
+               goto out_pm_put;
        }
 
+       mutex_lock(&gpu->lock);
+
        gpu->event[event].fence = fence;
        submit->fence = fence->seqno;
        gpu->active_fence = submit->fence;
@@ -1395,9 +1395,9 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
        hangcheck_timer_reset(gpu);
        ret = 0;
 
-out_unlock:
        mutex_unlock(&gpu->lock);
 
+out_pm_put:
        etnaviv_gpu_pm_put(gpu);
 
        return ret;
index 21f9390..f68c789 100644 (file)
@@ -882,11 +882,12 @@ struct i915_gem_context {
 
        struct i915_ctx_hang_stats hang_stats;
 
-       /* Unique identifier for this context, used by the hw for tracking */
        unsigned long flags;
 #define CONTEXT_NO_ZEROMAP             BIT(0)
 #define CONTEXT_NO_ERROR_CAPTURE       BIT(1)
-       unsigned hw_id;
+
+       /* Unique identifier for this context, used by the hw for tracking */
+       unsigned int hw_id;
        u32 user_handle;
 
        u32 ggtt_alignment;
@@ -1854,6 +1855,7 @@ struct drm_i915_private {
        enum modeset_restore modeset_restore;
        struct mutex modeset_restore_lock;
        struct drm_atomic_state *modeset_restore_state;
+       struct drm_modeset_acquire_ctx reset_ctx;
 
        struct list_head vm_list; /* Global list of all address spaces */
        struct i915_ggtt ggtt; /* VM representing the global address space */
@@ -1962,6 +1964,13 @@ struct drm_i915_private {
        struct i915_suspend_saved_registers regfile;
        struct vlv_s0ix_state vlv_s0ix_state;
 
+       enum {
+               I915_SKL_SAGV_UNKNOWN = 0,
+               I915_SKL_SAGV_DISABLED,
+               I915_SKL_SAGV_ENABLED,
+               I915_SKL_SAGV_NOT_CONTROLLED
+       } skl_sagv_status;
+
        struct {
                /*
                 * Raw watermark latency values:
@@ -3590,6 +3599,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
 /* belongs in i915_gem_gtt.h */
 static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv)
 {
+       wmb();
        if (INTEL_GEN(dev_priv) < 6)
                intel_gtt_chipset_flush();
 }
index 1168150..a77ce99 100644 (file)
@@ -879,9 +879,12 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
        ret = i915_gem_shmem_pread(dev, obj, args, file);
 
        /* pread for non shmem backed objects */
-       if (ret == -EFAULT || ret == -ENODEV)
+       if (ret == -EFAULT || ret == -ENODEV) {
+               intel_runtime_pm_get(to_i915(dev));
                ret = i915_gem_gtt_pread(dev, obj, args->size,
                                        args->offset, args->data_ptr);
+               intel_runtime_pm_put(to_i915(dev));
+       }
 
 out:
        drm_gem_object_unreference(&obj->base);
@@ -1306,7 +1309,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
                 * textures). Fallback to the shmem path in that case. */
        }
 
-       if (ret == -EFAULT) {
+       if (ret == -EFAULT || ret == -ENOSPC) {
                if (obj->phys_handle)
                        ret = i915_gem_phys_pwrite(obj, args, file);
                else if (i915_gem_object_has_struct_page(obj))
@@ -3169,6 +3172,8 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
        }
 
        intel_ring_init_seqno(engine, engine->last_submitted_seqno);
+
+       engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
 }
 
 void i915_gem_reset(struct drm_device *dev)
@@ -3186,6 +3191,7 @@ void i915_gem_reset(struct drm_device *dev)
 
        for_each_engine(engine, dev_priv)
                i915_gem_reset_engine_cleanup(engine);
+       mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
 
        i915_gem_context_reset(dev);
 
index 1978633..b35e5b6 100644 (file)
@@ -943,8 +943,6 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 {
        const unsigned other_rings = ~intel_engine_flag(req->engine);
        struct i915_vma *vma;
-       uint32_t flush_domains = 0;
-       bool flush_chipset = false;
        int ret;
 
        list_for_each_entry(vma, vmas, exec_list) {
@@ -957,16 +955,11 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
                }
 
                if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
-                       flush_chipset |= i915_gem_clflush_object(obj, false);
-
-               flush_domains |= obj->base.write_domain;
+                       i915_gem_clflush_object(obj, false);
        }
 
-       if (flush_chipset)
-               i915_gem_chipset_flush(req->engine->i915);
-
-       if (flush_domains & I915_GEM_DOMAIN_GTT)
-               wmb();
+       /* Unconditionally flush any chipset caches (for streaming writes). */
+       i915_gem_chipset_flush(req->engine->i915);
 
        /* Unconditionally invalidate gpu caches and ensure that we do flush
         * any residual writes from the previous batch.
index 10f1e32..7a30af7 100644 (file)
@@ -2873,6 +2873,7 @@ void i915_ggtt_cleanup_hw(struct drm_device *dev)
                struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
 
                ppgtt->base.cleanup(&ppgtt->base);
+               kfree(ppgtt);
        }
 
        i915_gem_cleanup_stolen(dev);
index ce14fe0..bf2cad3 100644 (file)
@@ -1536,6 +1536,7 @@ enum skl_disp_power_wells {
 #define BALANCE_LEG_MASK(port)         (7<<(8+3*(port)))
 /* Balance leg disable bits */
 #define BALANCE_LEG_DISABLE_SHIFT      23
+#define BALANCE_LEG_DISABLE(port)      (1 << (23 + (port)))
 
 /*
  * Fence registers
@@ -7144,6 +7145,15 @@ enum {
 
 #define GEN6_PCODE_MAILBOX                     _MMIO(0x138124)
 #define   GEN6_PCODE_READY                     (1<<31)
+#define   GEN6_PCODE_ERROR_MASK                        0xFF
+#define     GEN6_PCODE_SUCCESS                 0x0
+#define     GEN6_PCODE_ILLEGAL_CMD             0x1
+#define     GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE 0x2
+#define     GEN6_PCODE_TIMEOUT                 0x3
+#define     GEN6_PCODE_UNIMPLEMENTED_CMD       0xFF
+#define     GEN7_PCODE_TIMEOUT                 0x2
+#define     GEN7_PCODE_ILLEGAL_DATA            0x3
+#define     GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE 0x10
 #define          GEN6_PCODE_WRITE_RC6VIDS              0x4
 #define          GEN6_PCODE_READ_RC6VIDS               0x5
 #define     GEN6_ENCODE_RC6_VID(mv)            (((mv) - 245) / 5)
@@ -7165,6 +7175,10 @@ enum {
 #define   HSW_PCODE_DE_WRITE_FREQ_REQ          0x17
 #define   DISPLAY_IPS_CONTROL                  0x19
 #define          HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL  0x1A
+#define   GEN9_PCODE_SAGV_CONTROL              0x21
+#define     GEN9_SAGV_DISABLE                  0x0
+#define     GEN9_SAGV_IS_DISABLED              0x1
+#define     GEN9_SAGV_ENABLE                   0x3
 #define GEN6_PCODE_DATA                                _MMIO(0x138128)
 #define   GEN6_PCODE_FREQ_IA_RATIO_SHIFT       8
 #define   GEN6_PCODE_FREQ_RING_RATIO_SHIFT     16
index 6700a7b..d32f586 100644 (file)
@@ -600,6 +600,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev,
        if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv))
                return;
 
+       i915_audio_component_get_power(dev);
+
        /*
         * Enable/disable generating the codec wake signal, overriding the
         * internal logic to generate the codec wake to controller.
@@ -615,6 +617,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev,
                I915_WRITE(HSW_AUD_CHICKENBIT, tmp);
                usleep_range(1000, 1500);
        }
+
+       i915_audio_component_put_power(dev);
 }
 
 /* Get CDCLK in kHz  */
@@ -648,6 +652,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev,
            !IS_HASWELL(dev_priv))
                return 0;
 
+       i915_audio_component_get_power(dev);
        mutex_lock(&dev_priv->av_mutex);
        /* 1. get the pipe */
        intel_encoder = dev_priv->dig_port_map[port];
@@ -698,6 +703,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev,
 
  unlock:
        mutex_unlock(&dev_priv->av_mutex);
+       i915_audio_component_put_power(dev);
        return err;
 }
 
index 3edb958..c3b33a1 100644 (file)
  * be moved to FW_FAILED.
  */
 
-#define I915_CSR_KBL "i915/kbl_dmc_ver1.bin"
+#define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin"
 MODULE_FIRMWARE(I915_CSR_KBL);
 #define KBL_CSR_VERSION_REQUIRED       CSR_VERSION(1, 1)
 
-#define I915_CSR_SKL "i915/skl_dmc_ver1.bin"
+#define I915_CSR_SKL "i915/skl_dmc_ver1_26.bin"
 MODULE_FIRMWARE(I915_CSR_SKL);
-#define SKL_CSR_VERSION_REQUIRED       CSR_VERSION(1, 23)
+#define SKL_CSR_VERSION_REQUIRED       CSR_VERSION(1, 26)
 
-#define I915_CSR_BXT "i915/bxt_dmc_ver1.bin"
+#define I915_CSR_BXT "i915/bxt_dmc_ver1_07.bin"
 MODULE_FIRMWARE(I915_CSR_BXT);
 #define BXT_CSR_VERSION_REQUIRED       CSR_VERSION(1, 7)
 
index dd1d6fe..1a7efac 100644 (file)
@@ -145,7 +145,7 @@ static const struct ddi_buf_trans skl_ddi_translations_dp[] = {
 static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = {
        { 0x0000201B, 0x000000A2, 0x0 },
        { 0x00005012, 0x00000088, 0x0 },
-       { 0x80007011, 0x000000CD, 0x0 },
+       { 0x80007011, 0x000000CD, 0x1 },
        { 0x80009010, 0x000000C0, 0x1 },
        { 0x0000201B, 0x0000009D, 0x0 },
        { 0x80005012, 0x000000C0, 0x1 },
@@ -158,7 +158,7 @@ static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = {
 static const struct ddi_buf_trans skl_y_ddi_translations_dp[] = {
        { 0x00000018, 0x000000A2, 0x0 },
        { 0x00005012, 0x00000088, 0x0 },
-       { 0x80007011, 0x000000CD, 0x0 },
+       { 0x80007011, 0x000000CD, 0x3 },
        { 0x80009010, 0x000000C0, 0x3 },
        { 0x00000018, 0x0000009D, 0x0 },
        { 0x80005012, 0x000000C0, 0x3 },
@@ -388,6 +388,40 @@ skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries)
        }
 }
 
+static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port)
+{
+       int n_hdmi_entries;
+       int hdmi_level;
+       int hdmi_default_entry;
+
+       hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift;
+
+       if (IS_BROXTON(dev_priv))
+               return hdmi_level;
+
+       if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
+               skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries);
+               hdmi_default_entry = 8;
+       } else if (IS_BROADWELL(dev_priv)) {
+               n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
+               hdmi_default_entry = 7;
+       } else if (IS_HASWELL(dev_priv)) {
+               n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi);
+               hdmi_default_entry = 6;
+       } else {
+               WARN(1, "ddi translation table missing\n");
+               n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
+               hdmi_default_entry = 7;
+       }
+
+       /* Choose a good default if VBT is badly populated */
+       if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN ||
+           hdmi_level >= n_hdmi_entries)
+               hdmi_level = hdmi_default_entry;
+
+       return hdmi_level;
+}
+
 /*
  * Starting with Haswell, DDI port buffers must be programmed with correct
  * values in advance. The buffer values are different for FDI and DP modes,
@@ -399,7 +433,7 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
 {
        struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
        u32 iboost_bit = 0;
-       int i, n_hdmi_entries, n_dp_entries, n_edp_entries, hdmi_default_entry,
+       int i, n_hdmi_entries, n_dp_entries, n_edp_entries,
            size;
        int hdmi_level;
        enum port port;
@@ -410,7 +444,7 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
        const struct ddi_buf_trans *ddi_translations;
 
        port = intel_ddi_get_encoder_port(encoder);
-       hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift;
+       hdmi_level = intel_ddi_hdmi_level(dev_priv, port);
 
        if (IS_BROXTON(dev_priv)) {
                if (encoder->type != INTEL_OUTPUT_HDMI)
@@ -430,7 +464,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
                                skl_get_buf_trans_edp(dev_priv, &n_edp_entries);
                ddi_translations_hdmi =
                                skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries);
-               hdmi_default_entry = 8;
                /* If we're boosting the current, set bit 31 of trans1 */
                if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level ||
                    dev_priv->vbt.ddi_port_info[port].dp_boost_level)
@@ -456,7 +489,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
 
                n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
                n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
-               hdmi_default_entry = 7;
        } else if (IS_HASWELL(dev_priv)) {
                ddi_translations_fdi = hsw_ddi_translations_fdi;
                ddi_translations_dp = hsw_ddi_translations_dp;
@@ -464,7 +496,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
                ddi_translations_hdmi = hsw_ddi_translations_hdmi;
                n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp);
                n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi);
-               hdmi_default_entry = 6;
        } else {
                WARN(1, "ddi translation table missing\n");
                ddi_translations_edp = bdw_ddi_translations_dp;
@@ -474,7 +505,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
                n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp);
                n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
                n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
-               hdmi_default_entry = 7;
        }
 
        switch (encoder->type) {
@@ -505,11 +535,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
        if (encoder->type != INTEL_OUTPUT_HDMI)
                return;
 
-       /* Choose a good default if VBT is badly populated */
-       if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN ||
-           hdmi_level >= n_hdmi_entries)
-               hdmi_level = hdmi_default_entry;
-
        /* Entry 9 is for HDMI: */
        I915_WRITE(DDI_BUF_TRANS_LO(port, i),
                   ddi_translations_hdmi[hdmi_level].trans1 | iboost_bit);
@@ -1379,14 +1404,30 @@ void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc)
                           TRANS_CLK_SEL_DISABLED);
 }
 
-static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
-                              u32 level, enum port port, int type)
+static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
+                               enum port port, uint8_t iboost)
 {
+       u32 tmp;
+
+       tmp = I915_READ(DISPIO_CR_TX_BMU_CR0);
+       tmp &= ~(BALANCE_LEG_MASK(port) | BALANCE_LEG_DISABLE(port));
+       if (iboost)
+               tmp |= iboost << BALANCE_LEG_SHIFT(port);
+       else
+               tmp |= BALANCE_LEG_DISABLE(port);
+       I915_WRITE(DISPIO_CR_TX_BMU_CR0, tmp);
+}
+
+static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level)
+{
+       struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
+       struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev);
+       enum port port = intel_dig_port->port;
+       int type = encoder->type;
        const struct ddi_buf_trans *ddi_translations;
        uint8_t iboost;
        uint8_t dp_iboost, hdmi_iboost;
        int n_entries;
-       u32 reg;
 
        /* VBT may override standard boost values */
        dp_iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level;
@@ -1428,16 +1469,10 @@ static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
                return;
        }
 
-       reg = I915_READ(DISPIO_CR_TX_BMU_CR0);
-       reg &= ~BALANCE_LEG_MASK(port);
-       reg &= ~(1 << (BALANCE_LEG_DISABLE_SHIFT + port));
-
-       if (iboost)
-               reg |= iboost << BALANCE_LEG_SHIFT(port);
-       else
-               reg |= 1 << (BALANCE_LEG_DISABLE_SHIFT + port);
+       _skl_ddi_set_iboost(dev_priv, port, iboost);
 
-       I915_WRITE(DISPIO_CR_TX_BMU_CR0, reg);
+       if (port == PORT_A && intel_dig_port->max_lanes == 4)
+               _skl_ddi_set_iboost(dev_priv, PORT_E, iboost);
 }
 
 static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv,
@@ -1568,7 +1603,7 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp)
        level = translate_signal_level(signal_levels);
 
        if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
-               skl_ddi_set_iboost(dev_priv, level, port, encoder->type);
+               skl_ddi_set_iboost(encoder, level);
        else if (IS_BROXTON(dev_priv))
                bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type);
 
@@ -1637,6 +1672,10 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder)
                        intel_dp_stop_link_train(intel_dp);
        } else if (type == INTEL_OUTPUT_HDMI) {
                struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
+               int level = intel_ddi_hdmi_level(dev_priv, port);
+
+               if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
+                       skl_ddi_set_iboost(intel_encoder, level);
 
                intel_hdmi->set_infoframes(encoder,
                                           crtc->config->has_hdmi_sink,
index dcf93b3..175595f 100644 (file)
@@ -3093,40 +3093,110 @@ static void intel_update_primary_planes(struct drm_device *dev)
 
        for_each_crtc(dev, crtc) {
                struct intel_plane *plane = to_intel_plane(crtc->primary);
-               struct intel_plane_state *plane_state;
-
-               drm_modeset_lock_crtc(crtc, &plane->base);
-               plane_state = to_intel_plane_state(plane->base.state);
+               struct intel_plane_state *plane_state =
+                       to_intel_plane_state(plane->base.state);
 
                if (plane_state->visible)
                        plane->update_plane(&plane->base,
                                            to_intel_crtc_state(crtc->state),
                                            plane_state);
+       }
+}
+
+static int
+__intel_display_resume(struct drm_device *dev,
+                      struct drm_atomic_state *state)
+{
+       struct drm_crtc_state *crtc_state;
+       struct drm_crtc *crtc;
+       int i, ret;
+
+       intel_modeset_setup_hw_state(dev);
+       i915_redisable_vga(dev);
 
-               drm_modeset_unlock_crtc(crtc);
+       if (!state)
+               return 0;
+
+       for_each_crtc_in_state(state, crtc, crtc_state, i) {
+               /*
+                * Force recalculation even if we restore
+                * current state. With fast modeset this may not result
+                * in a modeset when the state is compatible.
+                */
+               crtc_state->mode_changed = true;
        }
+
+       /* ignore any reset values/BIOS leftovers in the WM registers */
+       to_intel_atomic_state(state)->skip_intermediate_wm = true;
+
+       ret = drm_atomic_commit(state);
+
+       WARN_ON(ret == -EDEADLK);
+       return ret;
 }
 
 void intel_prepare_reset(struct drm_i915_private *dev_priv)
 {
+       struct drm_device *dev = &dev_priv->drm;
+       struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
+       struct drm_atomic_state *state;
+       int ret;
+
        /* no reset support for gen2 */
        if (IS_GEN2(dev_priv))
                return;
 
-       /* reset doesn't touch the display */
+       /*
+        * Need mode_config.mutex so that we don't
+        * trample ongoing ->detect() and whatnot.
+        */
+       mutex_lock(&dev->mode_config.mutex);
+       drm_modeset_acquire_init(ctx, 0);
+       while (1) {
+               ret = drm_modeset_lock_all_ctx(dev, ctx);
+               if (ret != -EDEADLK)
+                       break;
+
+               drm_modeset_backoff(ctx);
+       }
+
+       /* reset doesn't touch the display, but flips might get nuked anyway, */
        if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))
                return;
 
-       drm_modeset_lock_all(&dev_priv->drm);
        /*
         * Disabling the crtcs gracefully seems nicer. Also the
         * g33 docs say we should at least disable all the planes.
         */
-       intel_display_suspend(&dev_priv->drm);
+       state = drm_atomic_helper_duplicate_state(dev, ctx);
+       if (IS_ERR(state)) {
+               ret = PTR_ERR(state);
+               state = NULL;
+               DRM_ERROR("Duplicating state failed with %i\n", ret);
+               goto err;
+       }
+
+       ret = drm_atomic_helper_disable_all(dev, ctx);
+       if (ret) {
+               DRM_ERROR("Suspending crtc's failed with %i\n", ret);
+               goto err;
+       }
+
+       dev_priv->modeset_restore_state = state;
+       state->acquire_ctx = ctx;
+       return;
+
+err:
+       drm_atomic_state_free(state);
 }
 
 void intel_finish_reset(struct drm_i915_private *dev_priv)
 {
+       struct drm_device *dev = &dev_priv->drm;
+       struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
+       struct drm_atomic_state *state = dev_priv->modeset_restore_state;
+       int ret;
+
        /*
         * Flips in the rings will be nuked by the reset,
         * so complete all pending flips so that user space
@@ -3138,6 +3208,8 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
        if (IS_GEN2(dev_priv))
                return;
 
+       dev_priv->modeset_restore_state = NULL;
+
        /* reset doesn't touch the display */
        if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
                /*
@@ -3149,29 +3221,32 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
                 * FIXME: Atomic will make this obsolete since we won't schedule
                 * CS-based flips (which might get lost in gpu resets) any more.
                 */
-               intel_update_primary_planes(&dev_priv->drm);
-               return;
-       }
-
-       /*
-        * The display has been reset as well,
-        * so need a full re-initialization.
-        */
-       intel_runtime_pm_disable_interrupts(dev_priv);
-       intel_runtime_pm_enable_interrupts(dev_priv);
+               intel_update_primary_planes(dev);
+       } else {
+               /*
+                * The display has been reset as well,
+                * so need a full re-initialization.
+                */
+               intel_runtime_pm_disable_interrupts(dev_priv);
+               intel_runtime_pm_enable_interrupts(dev_priv);
 
-       intel_modeset_init_hw(&dev_priv->drm);
+               intel_modeset_init_hw(dev);
 
-       spin_lock_irq(&dev_priv->irq_lock);
-       if (dev_priv->display.hpd_irq_setup)
-               dev_priv->display.hpd_irq_setup(dev_priv);
-       spin_unlock_irq(&dev_priv->irq_lock);
+               spin_lock_irq(&dev_priv->irq_lock);
+               if (dev_priv->display.hpd_irq_setup)
+                       dev_priv->display.hpd_irq_setup(dev_priv);
+               spin_unlock_irq(&dev_priv->irq_lock);
 
-       intel_display_resume(&dev_priv->drm);
+               ret = __intel_display_resume(dev, state);
+               if (ret)
+                       DRM_ERROR("Restoring old state failed with %i\n", ret);
 
-       intel_hpd_init(dev_priv);
+               intel_hpd_init(dev_priv);
+       }
 
-       drm_modeset_unlock_all(&dev_priv->drm);
+       drm_modeset_drop_locks(ctx);
+       drm_modeset_acquire_fini(ctx);
+       mutex_unlock(&dev->mode_config.mutex);
 }
 
 static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
@@ -13684,6 +13759,13 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
                     intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco))
                        dev_priv->display.modeset_commit_cdclk(state);
 
+               /*
+                * SKL workaround: bspec recommends we disable the SAGV when we
+                * have more then one pipe enabled
+                */
+               if (IS_SKYLAKE(dev_priv) && !skl_can_enable_sagv(state))
+                       skl_disable_sagv(dev_priv);
+
                intel_modeset_verify_disabled(dev);
        }
 
@@ -13757,6 +13839,10 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
                intel_modeset_verify_crtc(crtc, old_crtc_state, crtc->state);
        }
 
+       if (IS_SKYLAKE(dev_priv) && intel_state->modeset &&
+           skl_can_enable_sagv(state))
+               skl_enable_sagv(dev_priv);
+
        drm_atomic_helper_commit_hw_done(state);
 
        if (intel_state->modeset)
@@ -16156,9 +16242,10 @@ void intel_display_resume(struct drm_device *dev)
        struct drm_atomic_state *state = dev_priv->modeset_restore_state;
        struct drm_modeset_acquire_ctx ctx;
        int ret;
-       bool setup = false;
 
        dev_priv->modeset_restore_state = NULL;
+       if (state)
+               state->acquire_ctx = &ctx;
 
        /*
         * This is a cludge because with real atomic modeset mode_config.mutex
@@ -16169,43 +16256,17 @@ void intel_display_resume(struct drm_device *dev)
        mutex_lock(&dev->mode_config.mutex);
        drm_modeset_acquire_init(&ctx, 0);
 
-retry:
-       ret = drm_modeset_lock_all_ctx(dev, &ctx);
-
-       if (ret == 0 && !setup) {
-               setup = true;
-
-               intel_modeset_setup_hw_state(dev);
-               i915_redisable_vga(dev);
-       }
-
-       if (ret == 0 && state) {
-               struct drm_crtc_state *crtc_state;
-               struct drm_crtc *crtc;
-               int i;
-
-               state->acquire_ctx = &ctx;
-
-               /* ignore any reset values/BIOS leftovers in the WM registers */
-               to_intel_atomic_state(state)->skip_intermediate_wm = true;
-
-               for_each_crtc_in_state(state, crtc, crtc_state, i) {
-                       /*
-                        * Force recalculation even if we restore
-                        * current state. With fast modeset this may not result
-                        * in a modeset when the state is compatible.
-                        */
-                       crtc_state->mode_changed = true;
-               }
-
-               ret = drm_atomic_commit(state);
-       }
+       while (1) {
+               ret = drm_modeset_lock_all_ctx(dev, &ctx);
+               if (ret != -EDEADLK)
+                       break;
 
-       if (ret == -EDEADLK) {
                drm_modeset_backoff(&ctx);
-               goto retry;
        }
 
+       if (!ret)
+               ret = __intel_display_resume(dev, state);
+
        drm_modeset_drop_locks(&ctx);
        drm_modeset_acquire_fini(&ctx);
        mutex_unlock(&dev->mode_config.mutex);
index cc937a1..ff399b9 100644 (file)
@@ -1716,6 +1716,9 @@ void ilk_wm_get_hw_state(struct drm_device *dev);
 void skl_wm_get_hw_state(struct drm_device *dev);
 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
                          struct skl_ddb_allocation *ddb /* out */);
+bool skl_can_enable_sagv(struct drm_atomic_state *state);
+int skl_enable_sagv(struct drm_i915_private *dev_priv);
+int skl_disable_sagv(struct drm_i915_private *dev_priv);
 uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config);
 bool ilk_disable_lp_wm(struct drm_device *dev);
 int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6);
index 6a7ad3e..3836a1c 100644 (file)
@@ -1230,12 +1230,29 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv)
        if (i915.enable_fbc >= 0)
                return !!i915.enable_fbc;
 
+       if (!HAS_FBC(dev_priv))
+               return 0;
+
        if (IS_BROADWELL(dev_priv))
                return 1;
 
        return 0;
 }
 
+static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv)
+{
+#ifdef CONFIG_INTEL_IOMMU
+       /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */
+       if (intel_iommu_gfx_mapped &&
+           (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) {
+               DRM_INFO("Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n");
+               return true;
+       }
+#endif
+
+       return false;
+}
+
 /**
  * intel_fbc_init - Initialize FBC
  * @dev_priv: the i915 device
@@ -1253,6 +1270,9 @@ void intel_fbc_init(struct drm_i915_private *dev_priv)
        fbc->active = false;
        fbc->work.scheduled = false;
 
+       if (need_fbc_vtd_wa(dev_priv))
+               mkwrite_device_info(dev_priv)->has_fbc = false;
+
        i915.enable_fbc = intel_sanitize_fbc_option(dev_priv);
        DRM_DEBUG_KMS("Sanitized enable_fbc value: %d\n", i915.enable_fbc);
 
index 97ba6c8..53e13c1 100644 (file)
@@ -2852,6 +2852,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev)
 
 #define SKL_DDB_SIZE           896     /* in blocks */
 #define BXT_DDB_SIZE           512
+#define SKL_SAGV_BLOCK_TIME    30 /* Âµs */
 
 /*
  * Return the index of a plane in the SKL DDB and wm result arrays.  Primary
@@ -2875,6 +2876,153 @@ skl_wm_plane_id(const struct intel_plane *plane)
        }
 }
 
+/*
+ * SAGV dynamically adjusts the system agent voltage and clock frequencies
+ * depending on power and performance requirements. The display engine access
+ * to system memory is blocked during the adjustment time. Because of the
+ * blocking time, having this enabled can cause full system hangs and/or pipe
+ * underruns if we don't meet all of the following requirements:
+ *
+ *  - <= 1 pipe enabled
+ *  - All planes can enable watermarks for latencies >= SAGV engine block time
+ *  - We're not using an interlaced display configuration
+ */
+int
+skl_enable_sagv(struct drm_i915_private *dev_priv)
+{
+       int ret;
+
+       if (dev_priv->skl_sagv_status == I915_SKL_SAGV_NOT_CONTROLLED ||
+           dev_priv->skl_sagv_status == I915_SKL_SAGV_ENABLED)
+               return 0;
+
+       DRM_DEBUG_KMS("Enabling the SAGV\n");
+       mutex_lock(&dev_priv->rps.hw_lock);
+
+       ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
+                                     GEN9_SAGV_ENABLE);
+
+       /* We don't need to wait for the SAGV when enabling */
+       mutex_unlock(&dev_priv->rps.hw_lock);
+
+       /*
+        * Some skl systems, pre-release machines in particular,
+        * don't actually have an SAGV.
+        */
+       if (ret == -ENXIO) {
+               DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
+               dev_priv->skl_sagv_status = I915_SKL_SAGV_NOT_CONTROLLED;
+               return 0;
+       } else if (ret < 0) {
+               DRM_ERROR("Failed to enable the SAGV\n");
+               return ret;
+       }
+
+       dev_priv->skl_sagv_status = I915_SKL_SAGV_ENABLED;
+       return 0;
+}
+
+static int
+skl_do_sagv_disable(struct drm_i915_private *dev_priv)
+{
+       int ret;
+       uint32_t temp = GEN9_SAGV_DISABLE;
+
+       ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_SAGV_CONTROL,
+                                    &temp);
+       if (ret)
+               return ret;
+       else
+               return temp & GEN9_SAGV_IS_DISABLED;
+}
+
+int
+skl_disable_sagv(struct drm_i915_private *dev_priv)
+{
+       int ret, result;
+
+       if (dev_priv->skl_sagv_status == I915_SKL_SAGV_NOT_CONTROLLED ||
+           dev_priv->skl_sagv_status == I915_SKL_SAGV_DISABLED)
+               return 0;
+
+       DRM_DEBUG_KMS("Disabling the SAGV\n");
+       mutex_lock(&dev_priv->rps.hw_lock);
+
+       /* bspec says to keep retrying for at least 1 ms */
+       ret = wait_for(result = skl_do_sagv_disable(dev_priv), 1);
+       mutex_unlock(&dev_priv->rps.hw_lock);
+
+       if (ret == -ETIMEDOUT) {
+               DRM_ERROR("Request to disable SAGV timed out\n");
+               return -ETIMEDOUT;
+       }
+
+       /*
+        * Some skl systems, pre-release machines in particular,
+        * don't actually have an SAGV.
+        */
+       if (result == -ENXIO) {
+               DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
+               dev_priv->skl_sagv_status = I915_SKL_SAGV_NOT_CONTROLLED;
+               return 0;
+       } else if (result < 0) {
+               DRM_ERROR("Failed to disable the SAGV\n");
+               return result;
+       }
+
+       dev_priv->skl_sagv_status = I915_SKL_SAGV_DISABLED;
+       return 0;
+}
+
+bool skl_can_enable_sagv(struct drm_atomic_state *state)
+{
+       struct drm_device *dev = state->dev;
+       struct drm_i915_private *dev_priv = to_i915(dev);
+       struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+       struct drm_crtc *crtc;
+       enum pipe pipe;
+       int level, plane;
+
+       /*
+        * SKL workaround: bspec recommends we disable the SAGV when we have
+        * more then one pipe enabled
+        *
+        * If there are no active CRTCs, no additional checks need be performed
+        */
+       if (hweight32(intel_state->active_crtcs) == 0)
+               return true;
+       else if (hweight32(intel_state->active_crtcs) > 1)
+               return false;
+
+       /* Since we're now guaranteed to only have one active CRTC... */
+       pipe = ffs(intel_state->active_crtcs) - 1;
+       crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+
+       if (crtc->state->mode.flags & DRM_MODE_FLAG_INTERLACE)
+               return false;
+
+       for_each_plane(dev_priv, pipe, plane) {
+               /* Skip this plane if it's not enabled */
+               if (intel_state->wm_results.plane[pipe][plane][0] == 0)
+                       continue;
+
+               /* Find the highest enabled wm level for this plane */
+               for (level = ilk_wm_max_level(dev);
+                    intel_state->wm_results.plane[pipe][plane][level] == 0; --level)
+                    { }
+
+               /*
+                * If any of the planes on this pipe don't enable wm levels
+                * that incur memory latencies higher then 30µs we can't enable
+                * the SAGV
+                */
+               if (dev_priv->wm.skl_latency[level] < SKL_SAGV_BLOCK_TIME)
+                       return false;
+       }
+
+       return true;
+}
+
 static void
 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
                                   const struct intel_crtc_state *cstate,
@@ -3107,8 +3255,6 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate)
                total_data_rate += intel_cstate->wm.skl.plane_y_data_rate[id];
        }
 
-       WARN_ON(cstate->plane_mask && total_data_rate == 0);
-
        return total_data_rate;
 }
 
@@ -3344,6 +3490,8 @@ static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
                plane_bytes_per_line *= 4;
                plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
                plane_blocks_per_line /= 4;
+       } else if (tiling == DRM_FORMAT_MOD_NONE) {
+               plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1;
        } else {
                plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
        }
@@ -3910,9 +4058,24 @@ skl_compute_ddb(struct drm_atomic_state *state)
         * pretend that all pipes switched active status so that we'll
         * ensure a full DDB recompute.
         */
-       if (dev_priv->wm.distrust_bios_wm)
+       if (dev_priv->wm.distrust_bios_wm) {
+               ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
+                                      state->acquire_ctx);
+               if (ret)
+                       return ret;
+
                intel_state->active_pipe_changes = ~0;
 
+               /*
+                * We usually only initialize intel_state->active_crtcs if we
+                * we're doing a modeset; make sure this field is always
+                * initialized during the sanitization process that happens
+                * on the first commit too.
+                */
+               if (!intel_state->modeset)
+                       intel_state->active_crtcs = dev_priv->active_crtcs;
+       }
+
        /*
         * If the modeset changes which CRTC's are active, we need to
         * recompute the DDB allocation for *all* active pipes, even
@@ -3941,11 +4104,33 @@ skl_compute_ddb(struct drm_atomic_state *state)
                ret = skl_allocate_pipe_ddb(cstate, ddb);
                if (ret)
                        return ret;
+
+               ret = drm_atomic_add_affected_planes(state, &intel_crtc->base);
+               if (ret)
+                       return ret;
        }
 
        return 0;
 }
 
+static void
+skl_copy_wm_for_pipe(struct skl_wm_values *dst,
+                    struct skl_wm_values *src,
+                    enum pipe pipe)
+{
+       dst->wm_linetime[pipe] = src->wm_linetime[pipe];
+       memcpy(dst->plane[pipe], src->plane[pipe],
+              sizeof(dst->plane[pipe]));
+       memcpy(dst->plane_trans[pipe], src->plane_trans[pipe],
+              sizeof(dst->plane_trans[pipe]));
+
+       dst->ddb.pipe[pipe] = src->ddb.pipe[pipe];
+       memcpy(dst->ddb.y_plane[pipe], src->ddb.y_plane[pipe],
+              sizeof(dst->ddb.y_plane[pipe]));
+       memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe],
+              sizeof(dst->ddb.plane[pipe]));
+}
+
 static int
 skl_compute_wm(struct drm_atomic_state *state)
 {
@@ -4018,8 +4203,10 @@ static void skl_update_wm(struct drm_crtc *crtc)
        struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
        struct skl_wm_values *results = &dev_priv->wm.skl_results;
+       struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw;
        struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
        struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
+       int pipe;
 
        if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0)
                return;
@@ -4031,8 +4218,12 @@ static void skl_update_wm(struct drm_crtc *crtc)
        skl_write_wm_values(dev_priv, results);
        skl_flush_wm_values(dev_priv, results);
 
-       /* store the new configuration */
-       dev_priv->wm.skl_hw = *results;
+       /*
+        * Store the new configuration (but only for the pipes that have
+        * changed; the other values weren't recomputed).
+        */
+       for_each_pipe_masked(dev_priv, pipe, results->dirty_pipes)
+               skl_copy_wm_for_pipe(hw_vals, results, pipe);
 
        mutex_unlock(&dev_priv->wm.wm_mutex);
 }
@@ -6574,9 +6765,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 
 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
 {
-       if (IS_CHERRYVIEW(dev_priv))
-               return;
-       else if (IS_VALLEYVIEW(dev_priv))
+       if (IS_VALLEYVIEW(dev_priv))
                valleyview_cleanup_gt_powersave(dev_priv);
 
        if (!i915.enable_rc6)
@@ -7658,8 +7847,53 @@ void intel_init_pm(struct drm_device *dev)
        }
 }
 
+static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
+{
+       uint32_t flags =
+               I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
+
+       switch (flags) {
+       case GEN6_PCODE_SUCCESS:
+               return 0;
+       case GEN6_PCODE_UNIMPLEMENTED_CMD:
+       case GEN6_PCODE_ILLEGAL_CMD:
+               return -ENXIO;
+       case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
+               return -EOVERFLOW;
+       case GEN6_PCODE_TIMEOUT:
+               return -ETIMEDOUT;
+       default:
+               MISSING_CASE(flags)
+               return 0;
+       }
+}
+
+static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
+{
+       uint32_t flags =
+               I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
+
+       switch (flags) {
+       case GEN6_PCODE_SUCCESS:
+               return 0;
+       case GEN6_PCODE_ILLEGAL_CMD:
+               return -ENXIO;
+       case GEN7_PCODE_TIMEOUT:
+               return -ETIMEDOUT;
+       case GEN7_PCODE_ILLEGAL_DATA:
+               return -EINVAL;
+       case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
+               return -EOVERFLOW;
+       default:
+               MISSING_CASE(flags);
+               return 0;
+       }
+}
+
 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
 {
+       int status;
+
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
        /* GEN6_PCODE_* are outside of the forcewake domain, we can
@@ -7686,12 +7920,25 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
        *val = I915_READ_FW(GEN6_PCODE_DATA);
        I915_WRITE_FW(GEN6_PCODE_DATA, 0);
 
+       if (INTEL_GEN(dev_priv) > 6)
+               status = gen7_check_mailbox_status(dev_priv);
+       else
+               status = gen6_check_mailbox_status(dev_priv);
+
+       if (status) {
+               DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed: %d\n",
+                                status);
+               return status;
+       }
+
        return 0;
 }
 
 int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
-                              u32 mbox, u32 val)
+                           u32 mbox, u32 val)
 {
+       int status;
+
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
        /* GEN6_PCODE_* are outside of the forcewake domain, we can
@@ -7716,6 +7963,17 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
 
        I915_WRITE_FW(GEN6_PCODE_DATA, 0);
 
+       if (INTEL_GEN(dev_priv) > 6)
+               status = gen7_check_mailbox_status(dev_priv);
+       else
+               status = gen6_check_mailbox_status(dev_priv);
+
+       if (status) {
+               DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed: %d\n",
+                                status);
+               return status;
+       }
+
        return 0;
 }
 
index cca7792..1d3161b 100644 (file)
@@ -1178,8 +1178,8 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine)
                I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) |
                                           L3_HIGH_PRIO_CREDITS(2));
 
-       /* WaInsertDummyPushConstPs:bxt */
-       if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
+       /* WaToEnableHwFixForPushConstHWBug:bxt */
+       if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
                WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
                                  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 
@@ -1222,8 +1222,8 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine)
                I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
                           GEN8_LQSC_RO_PERF_DIS);
 
-       /* WaInsertDummyPushConstPs:kbl */
-       if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+       /* WaToEnableHwFixForPushConstHWBug:kbl */
+       if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
                WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
                                  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 
index 9f7dafc..7bf90e9 100644 (file)
@@ -171,10 +171,34 @@ static void imx_drm_output_poll_changed(struct drm_device *drm)
        drm_fbdev_cma_hotplug_event(imxdrm->fbhelper);
 }
 
+static int imx_drm_atomic_check(struct drm_device *dev,
+                               struct drm_atomic_state *state)
+{
+       int ret;
+
+       ret = drm_atomic_helper_check_modeset(dev, state);
+       if (ret)
+               return ret;
+
+       ret = drm_atomic_helper_check_planes(dev, state);
+       if (ret)
+               return ret;
+
+       /*
+        * Check modeset again in case crtc_state->mode_changed is
+        * updated in plane's ->atomic_check callback.
+        */
+       ret = drm_atomic_helper_check_modeset(dev, state);
+       if (ret)
+               return ret;
+
+       return ret;
+}
+
 static const struct drm_mode_config_funcs imx_drm_mode_config_funcs = {
        .fb_create = drm_fb_cma_create,
        .output_poll_changed = imx_drm_output_poll_changed,
-       .atomic_check = drm_atomic_helper_check,
+       .atomic_check = imx_drm_atomic_check,
        .atomic_commit = drm_atomic_helper_commit,
 };
 
index 08e188b..462056e 100644 (file)
@@ -76,6 +76,8 @@ static void ipu_crtc_disable(struct drm_crtc *crtc)
                crtc->state->event = NULL;
        }
        spin_unlock_irq(&crtc->dev->event_lock);
+
+       drm_crtc_vblank_off(crtc);
 }
 
 static void imx_drm_crtc_reset(struct drm_crtc *crtc)
@@ -175,6 +177,8 @@ static int ipu_crtc_atomic_check(struct drm_crtc *crtc,
 static void ipu_crtc_atomic_begin(struct drm_crtc *crtc,
                                  struct drm_crtc_state *old_crtc_state)
 {
+       drm_crtc_vblank_on(crtc);
+
        spin_lock_irq(&crtc->dev->event_lock);
        if (crtc->state->event) {
                WARN_ON(drm_crtc_vblank_get(crtc));
index 4ad67d0..29423e7 100644 (file)
@@ -319,13 +319,14 @@ static int ipu_plane_atomic_check(struct drm_plane *plane,
                return -EINVAL;
 
        /*
-        * since we cannot touch active IDMAC channels, we do not support
-        * resizing the enabled plane or changing its format
+        * We support resizing active plane or changing its format by
+        * forcing CRTC mode change and disabling-enabling plane in plane's
+        * ->atomic_update callback.
         */
        if (old_fb && (state->src_w != old_state->src_w ||
                              state->src_h != old_state->src_h ||
                              fb->pixel_format != old_fb->pixel_format))
-               return -EINVAL;
+               crtc_state->mode_changed = true;
 
        eba = drm_plane_state_to_eba(state);
 
@@ -336,7 +337,7 @@ static int ipu_plane_atomic_check(struct drm_plane *plane,
                return -EINVAL;
 
        if (old_fb && fb->pitches[0] != old_fb->pitches[0])
-               return -EINVAL;
+               crtc_state->mode_changed = true;
 
        switch (fb->pixel_format) {
        case DRM_FORMAT_YUV420:
@@ -372,7 +373,7 @@ static int ipu_plane_atomic_check(struct drm_plane *plane,
                        return -EINVAL;
 
                if (old_fb && old_fb->pitches[1] != fb->pitches[1])
-                       return -EINVAL;
+                       crtc_state->mode_changed = true;
        }
 
        return 0;
@@ -392,8 +393,14 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
        enum ipu_color_space ics;
 
        if (old_state->fb) {
-               ipu_plane_atomic_set_base(ipu_plane, old_state);
-               return;
+               struct drm_crtc_state *crtc_state = state->crtc->state;
+
+               if (!crtc_state->mode_changed) {
+                       ipu_plane_atomic_set_base(ipu_plane, old_state);
+                       return;
+               }
+
+               ipu_disable_plane(plane);
        }
 
        switch (ipu_plane->dp_flow) {
index 23ac804..294de45 100644 (file)
@@ -2,6 +2,9 @@ config DRM_MEDIATEK
        tristate "DRM Support for Mediatek SoCs"
        depends on DRM
        depends on ARCH_MEDIATEK || (ARM && COMPILE_TEST)
+       depends on COMMON_CLK
+       depends on HAVE_ARM_SMCCC
+       depends on OF
        select DRM_GEM_CMA_HELPER
        select DRM_KMS_HELPER
        select DRM_MIPI_DSI
index b4bc7f1..d0da52f 100644 (file)
@@ -157,6 +157,12 @@ struct msm_drm_private {
        struct shrinker shrinker;
 
        struct msm_vblank_ctrl vblank_ctrl;
+
+       /* task holding struct_mutex.. currently only used in submit path
+        * to detect and reject faults from copy_from_user() for submit
+        * ioctl.
+        */
+       struct task_struct *struct_mutex_task;
 };
 
 struct msm_format {
index 6cd4af4..85f3047 100644 (file)
@@ -196,11 +196,20 @@ int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        struct drm_gem_object *obj = vma->vm_private_data;
        struct drm_device *dev = obj->dev;
+       struct msm_drm_private *priv = dev->dev_private;
        struct page **pages;
        unsigned long pfn;
        pgoff_t pgoff;
        int ret;
 
+       /* This should only happen if userspace tries to pass a mmap'd
+        * but unfaulted gem bo vaddr into submit ioctl, triggering
+        * a page fault while struct_mutex is already held.  This is
+        * not a valid use-case so just bail.
+        */
+       if (priv->struct_mutex_task == current)
+               return VM_FAULT_SIGBUS;
+
        /* Make sure we don't parallel update on a fault, nor move or remove
         * something from beneath our feet
         */
index 9766f9a..880d6a9 100644 (file)
@@ -64,6 +64,14 @@ void msm_gem_submit_free(struct msm_gem_submit *submit)
        kfree(submit);
 }
 
+static inline unsigned long __must_check
+copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
+{
+       if (access_ok(VERIFY_READ, from, n))
+               return __copy_from_user_inatomic(to, from, n);
+       return -EFAULT;
+}
+
 static int submit_lookup_objects(struct msm_gem_submit *submit,
                struct drm_msm_gem_submit *args, struct drm_file *file)
 {
@@ -71,6 +79,7 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
        int ret = 0;
 
        spin_lock(&file->table_lock);
+       pagefault_disable();
 
        for (i = 0; i < args->nr_bos; i++) {
                struct drm_msm_gem_submit_bo submit_bo;
@@ -84,10 +93,15 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
                 */
                submit->bos[i].flags = 0;
 
-               ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo));
-               if (ret) {
-                       ret = -EFAULT;
-                       goto out_unlock;
+               ret = copy_from_user_inatomic(&submit_bo, userptr, sizeof(submit_bo));
+               if (unlikely(ret)) {
+                       pagefault_enable();
+                       spin_unlock(&file->table_lock);
+                       ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo));
+                       if (ret)
+                               goto out;
+                       spin_lock(&file->table_lock);
+                       pagefault_disable();
                }
 
                if (submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) {
@@ -127,9 +141,12 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
        }
 
 out_unlock:
-       submit->nr_bos = i;
+       pagefault_enable();
        spin_unlock(&file->table_lock);
 
+out:
+       submit->nr_bos = i;
+
        return ret;
 }
 
@@ -377,6 +394,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
        if (ret)
                return ret;
 
+       priv->struct_mutex_task = current;
+
        submit = submit_create(dev, gpu, args->nr_bos, args->nr_cmds);
        if (!submit) {
                ret = -ENOMEM;
@@ -468,6 +487,7 @@ out:
        if (ret)
                msm_gem_submit_free(submit);
 out_unlock:
+       priv->struct_mutex_task = NULL;
        mutex_unlock(&dev->struct_mutex);
        return ret;
 }
index f2ad17a..dc57b62 100644 (file)
@@ -225,6 +225,17 @@ static bool nouveau_pr3_present(struct pci_dev *pdev)
        if (!parent_pdev)
                return false;
 
+       if (!parent_pdev->bridge_d3) {
+               /*
+                * Parent PCI bridge is currently not power managed.
+                * Since userspace can change these afterwards to be on
+                * the safe side we stick with _DSM and prevent usage of
+                * _PR3 from the bridge.
+                */
+               pci_d3cold_disable(pdev);
+               return false;
+       }
+
        parent_adev = ACPI_COMPANION(&parent_pdev->dev);
        if (!parent_adev)
                return false;
index df26570..28c1423 100644 (file)
@@ -73,10 +73,12 @@ static void qxl_fb_image_init(struct qxl_fb_image *qxl_fb_image,
        }
 }
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 static struct fb_deferred_io qxl_defio = {
        .delay          = QXL_DIRTY_DELAY,
        .deferred_io    = drm_fb_helper_deferred_io,
 };
+#endif
 
 static struct fb_ops qxlfb_ops = {
        .owner = THIS_MODULE,
@@ -313,8 +315,10 @@ static int qxlfb_create(struct qxl_fbdev *qfbdev,
                goto out_destroy_fbi;
        }
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
        info->fbdefio = &qxl_defio;
        fb_deferred_io_init(info);
+#endif
 
        qdev->fbdev_info = info;
        qdev->fbdev_qfb = &qfbdev->qfb;
index a97abc8..1dcf390 100644 (file)
@@ -627,7 +627,9 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
                        if (radeon_crtc->ss.refdiv) {
                                radeon_crtc->pll_flags |= RADEON_PLL_USE_REF_DIV;
                                radeon_crtc->pll_reference_div = radeon_crtc->ss.refdiv;
-                               if (rdev->family >= CHIP_RV770)
+                               if (ASIC_IS_AVIVO(rdev) &&
+                                   rdev->family != CHIP_RS780 &&
+                                   rdev->family != CHIP_RS880)
                                        radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
                        }
                }
index 6de3428..ddef0d4 100644 (file)
@@ -198,16 +198,7 @@ static int radeon_atpx_validate(struct radeon_atpx *atpx)
        atpx->is_hybrid = false;
        if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
                printk("ATPX Hybrid Graphics\n");
-#if 1
-               /* This is a temporary hack until the D3 cold support
-                * makes it upstream.  The ATPX power_control method seems
-                * to still work on even if the system should be using
-                * the new standardized hybrid D3 cold ACPI interface.
-                */
-               atpx->functions.power_cntl = true;
-#else
                atpx->functions.power_cntl = false;
-#endif
                atpx->is_hybrid = true;
        }
 
index 0c00e19..c2e0a1c 100644 (file)
@@ -263,8 +263,8 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 
        rdev = radeon_get_rdev(bo->bdev);
        ridx = radeon_copy_ring_index(rdev);
-       old_start = old_mem->start << PAGE_SHIFT;
-       new_start = new_mem->start << PAGE_SHIFT;
+       old_start = (u64)old_mem->start << PAGE_SHIFT;
+       new_start = (u64)new_mem->start << PAGE_SHIFT;
 
        switch (old_mem->mem_type) {
        case TTM_PL_VRAM:
index 3d228ad..3dea121 100644 (file)
@@ -840,6 +840,21 @@ static const struct drm_encoder_funcs tegra_dsi_encoder_funcs = {
        .destroy = tegra_output_encoder_destroy,
 };
 
+static void tegra_dsi_unprepare(struct tegra_dsi *dsi)
+{
+       int err;
+
+       if (dsi->slave)
+               tegra_dsi_unprepare(dsi->slave);
+
+       err = tegra_mipi_disable(dsi->mipi);
+       if (err < 0)
+               dev_err(dsi->dev, "failed to disable MIPI calibration: %d\n",
+                       err);
+
+       pm_runtime_put(dsi->dev);
+}
+
 static void tegra_dsi_encoder_disable(struct drm_encoder *encoder)
 {
        struct tegra_output *output = encoder_to_output(encoder);
@@ -876,7 +891,26 @@ static void tegra_dsi_encoder_disable(struct drm_encoder *encoder)
 
        tegra_dsi_disable(dsi);
 
-       pm_runtime_put(dsi->dev);
+       tegra_dsi_unprepare(dsi);
+}
+
+static void tegra_dsi_prepare(struct tegra_dsi *dsi)
+{
+       int err;
+
+       pm_runtime_get_sync(dsi->dev);
+
+       err = tegra_mipi_enable(dsi->mipi);
+       if (err < 0)
+               dev_err(dsi->dev, "failed to enable MIPI calibration: %d\n",
+                       err);
+
+       err = tegra_dsi_pad_calibrate(dsi);
+       if (err < 0)
+               dev_err(dsi->dev, "MIPI calibration failed: %d\n", err);
+
+       if (dsi->slave)
+               tegra_dsi_prepare(dsi->slave);
 }
 
 static void tegra_dsi_encoder_enable(struct drm_encoder *encoder)
@@ -887,13 +921,8 @@ static void tegra_dsi_encoder_enable(struct drm_encoder *encoder)
        struct tegra_dsi *dsi = to_dsi(output);
        struct tegra_dsi_state *state;
        u32 value;
-       int err;
-
-       pm_runtime_get_sync(dsi->dev);
 
-       err = tegra_dsi_pad_calibrate(dsi);
-       if (err < 0)
-               dev_err(dsi->dev, "MIPI calibration failed: %d\n", err);
+       tegra_dsi_prepare(dsi);
 
        state = tegra_dsi_get_state(dsi);
 
index d5df555..9688bfa 100644 (file)
@@ -203,6 +203,7 @@ static int udl_fb_open(struct fb_info *info, int user)
 
        ufbdev->fb_count++;
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
        if (fb_defio && (info->fbdefio == NULL)) {
                /* enable defio at last moment if not disabled by client */
 
@@ -218,6 +219,7 @@ static int udl_fb_open(struct fb_info *info, int user)
                info->fbdefio = fbdefio;
                fb_deferred_io_init(info);
        }
+#endif
 
        pr_notice("open /dev/fb%d user=%d fb_info=%p count=%d\n",
                  info->node, user, info, ufbdev->fb_count);
@@ -235,12 +237,14 @@ static int udl_fb_release(struct fb_info *info, int user)
 
        ufbdev->fb_count--;
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
        if ((ufbdev->fb_count == 0) && (info->fbdefio)) {
                fb_deferred_io_cleanup(info);
                kfree(info->fbdefio);
                info->fbdefio = NULL;
                info->fbops->fb_mmap = udl_fb_mmap;
        }
+#endif
 
        pr_warn("released /dev/fb%d user=%d count=%d\n",
                info->node, user, ufbdev->fb_count);
index 8b42d31..9ecef93 100644 (file)
@@ -57,21 +57,21 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
        switch (args->param) {
        case DRM_VC4_PARAM_V3D_IDENT0:
                ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
-               if (ret)
+               if (ret < 0)
                        return ret;
                args->value = V3D_READ(V3D_IDENT0);
                pm_runtime_put(&vc4->v3d->pdev->dev);
                break;
        case DRM_VC4_PARAM_V3D_IDENT1:
                ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
-               if (ret)
+               if (ret < 0)
                        return ret;
                args->value = V3D_READ(V3D_IDENT1);
                pm_runtime_put(&vc4->v3d->pdev->dev);
                break;
        case DRM_VC4_PARAM_V3D_IDENT2:
                ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
-               if (ret)
+               if (ret < 0)
                        return ret;
                args->value = V3D_READ(V3D_IDENT2);
                pm_runtime_put(&vc4->v3d->pdev->dev);
index 489e3de..428e249 100644 (file)
@@ -321,6 +321,15 @@ vc4_first_render_job(struct vc4_dev *vc4)
                                struct vc4_exec_info, head);
 }
 
+static inline struct vc4_exec_info *
+vc4_last_render_job(struct vc4_dev *vc4)
+{
+       if (list_empty(&vc4->render_job_list))
+               return NULL;
+       return list_last_entry(&vc4->render_job_list,
+                              struct vc4_exec_info, head);
+}
+
 /**
  * struct vc4_texture_sample_info - saves the offsets into the UBO for texture
  * setup parameters.
index 6155e8a..b262c5c 100644 (file)
@@ -534,8 +534,8 @@ vc4_cl_lookup_bos(struct drm_device *dev,
                return -EINVAL;
        }
 
-       exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *),
-                          GFP_KERNEL);
+       exec->bo = drm_calloc_large(exec->bo_count,
+                                   sizeof(struct drm_gem_cma_object *));
        if (!exec->bo) {
                DRM_ERROR("Failed to allocate validated BO pointers\n");
                return -ENOMEM;
@@ -572,8 +572,8 @@ vc4_cl_lookup_bos(struct drm_device *dev,
        spin_unlock(&file_priv->table_lock);
 
 fail:
-       kfree(handles);
-       return 0;
+       drm_free_large(handles);
+       return ret;
 }
 
 static int
@@ -608,7 +608,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
         * read the contents back for validation, and I think the
         * bo->vaddr is uncached access.
         */
-       temp = kmalloc(temp_size, GFP_KERNEL);
+       temp = drm_malloc_ab(temp_size, 1);
        if (!temp) {
                DRM_ERROR("Failed to allocate storage for copying "
                          "in bin/render CLs.\n");
@@ -675,7 +675,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
        ret = vc4_validate_shader_recs(dev, exec);
 
 fail:
-       kfree(temp);
+       drm_free_large(temp);
        return ret;
 }
 
@@ -688,7 +688,7 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
        if (exec->bo) {
                for (i = 0; i < exec->bo_count; i++)
                        drm_gem_object_unreference_unlocked(&exec->bo[i]->base);
-               kfree(exec->bo);
+               drm_free_large(exec->bo);
        }
 
        while (!list_empty(&exec->unref_list)) {
@@ -942,8 +942,8 @@ vc4_gem_destroy(struct drm_device *dev)
                vc4->overflow_mem = NULL;
        }
 
-       vc4_bo_cache_destroy(dev);
-
        if (vc4->hang_state)
                vc4_free_hang_state(dev, vc4->hang_state);
+
+       vc4_bo_cache_destroy(dev);
 }
index b0104a3..094bc6a 100644 (file)
@@ -83,8 +83,10 @@ vc4_overflow_mem_work(struct work_struct *work)
 
                spin_lock_irqsave(&vc4->job_lock, irqflags);
                current_exec = vc4_first_bin_job(vc4);
+               if (!current_exec)
+                       current_exec = vc4_last_render_job(vc4);
                if (current_exec) {
-                       vc4->overflow_mem->seqno = vc4->finished_seqno + 1;
+                       vc4->overflow_mem->seqno = current_exec->seqno;
                        list_add_tail(&vc4->overflow_mem->unref_head,
                                      &current_exec->unref_list);
                        vc4->overflow_mem = NULL;
index 52a6fd2..e00809d 100644 (file)
@@ -242,20 +242,6 @@ struct tegra_mipi_device *tegra_mipi_request(struct device *device)
        dev->pads = args.args[0];
        dev->device = device;
 
-       mutex_lock(&dev->mipi->lock);
-
-       if (dev->mipi->usage_count++ == 0) {
-               err = tegra_mipi_power_up(dev->mipi);
-               if (err < 0) {
-                       dev_err(dev->mipi->dev,
-                               "failed to power up MIPI bricks: %d\n",
-                               err);
-                       return ERR_PTR(err);
-               }
-       }
-
-       mutex_unlock(&dev->mipi->lock);
-
        return dev;
 
 put:
@@ -270,29 +256,42 @@ EXPORT_SYMBOL(tegra_mipi_request);
 
 void tegra_mipi_free(struct tegra_mipi_device *device)
 {
-       int err;
+       platform_device_put(device->pdev);
+       kfree(device);
+}
+EXPORT_SYMBOL(tegra_mipi_free);
 
-       mutex_lock(&device->mipi->lock);
+int tegra_mipi_enable(struct tegra_mipi_device *dev)
+{
+       int err = 0;
 
-       if (--device->mipi->usage_count == 0) {
-               err = tegra_mipi_power_down(device->mipi);
-               if (err < 0) {
-                       /*
-                        * Not much that can be done here, so an error message
-                        * will have to do.
-                        */
-                       dev_err(device->mipi->dev,
-                               "failed to power down MIPI bricks: %d\n",
-                               err);
-               }
-       }
+       mutex_lock(&dev->mipi->lock);
 
-       mutex_unlock(&device->mipi->lock);
+       if (dev->mipi->usage_count++ == 0)
+               err = tegra_mipi_power_up(dev->mipi);
+
+       mutex_unlock(&dev->mipi->lock);
+
+       return err;
 
-       platform_device_put(device->pdev);
-       kfree(device);
 }
-EXPORT_SYMBOL(tegra_mipi_free);
+EXPORT_SYMBOL(tegra_mipi_enable);
+
+int tegra_mipi_disable(struct tegra_mipi_device *dev)
+{
+       int err = 0;
+
+       mutex_lock(&dev->mipi->lock);
+
+       if (--dev->mipi->usage_count == 0)
+               err = tegra_mipi_power_down(dev->mipi);
+
+       mutex_unlock(&dev->mipi->lock);
+
+       return err;
+
+}
+EXPORT_SYMBOL(tegra_mipi_disable);
 
 static int tegra_mipi_wait(struct tegra_mipi *mipi)
 {
index 730d840..4667012 100644 (file)
@@ -491,7 +491,7 @@ struct it87_sio_data {
 struct it87_data {
        const struct attribute_group *groups[7];
        enum chips type;
-       u16 features;
+       u32 features;
        u8 peci_mask;
        u8 old_peci_mask;
 
@@ -2015,6 +2015,7 @@ static struct attribute *it87_attributes_in[] = {
        &sensor_dev_attr_in10_input.dev_attr.attr,      /* 41 */
        &sensor_dev_attr_in11_input.dev_attr.attr,      /* 41 */
        &sensor_dev_attr_in12_input.dev_attr.attr,      /* 41 */
+       NULL
 };
 
 static const struct attribute_group it87_group_in = {
index f233726..1bb97f6 100644 (file)
@@ -38,6 +38,7 @@
 #define AT91_I2C_TIMEOUT       msecs_to_jiffies(100)   /* transfer timeout */
 #define AT91_I2C_DMA_THRESHOLD 8                       /* enable DMA if transfer size is bigger than this threshold */
 #define AUTOSUSPEND_TIMEOUT            2000
+#define AT91_I2C_MAX_ALT_CMD_DATA_SIZE 256
 
 /* AT91 TWI register definitions */
 #define        AT91_TWI_CR             0x0000  /* Control Register */
@@ -141,6 +142,7 @@ struct at91_twi_dev {
        unsigned twi_cwgr_reg;
        struct at91_twi_pdata *pdata;
        bool use_dma;
+       bool use_alt_cmd;
        bool recv_len_abort;
        u32 fifo_size;
        struct at91_twi_dma dma;
@@ -269,7 +271,7 @@ static void at91_twi_write_next_byte(struct at91_twi_dev *dev)
 
        /* send stop when last byte has been written */
        if (--dev->buf_len == 0)
-               if (!dev->pdata->has_alt_cmd)
+               if (!dev->use_alt_cmd)
                        at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
 
        dev_dbg(dev->dev, "wrote 0x%x, to go %d\n", *dev->buf, dev->buf_len);
@@ -292,7 +294,7 @@ static void at91_twi_write_data_dma_callback(void *data)
         * we just have to enable TXCOMP one.
         */
        at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_TXCOMP);
-       if (!dev->pdata->has_alt_cmd)
+       if (!dev->use_alt_cmd)
                at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
 }
 
@@ -410,7 +412,7 @@ static void at91_twi_read_next_byte(struct at91_twi_dev *dev)
        }
 
        /* send stop if second but last byte has been read */
-       if (!dev->pdata->has_alt_cmd && dev->buf_len == 1)
+       if (!dev->use_alt_cmd && dev->buf_len == 1)
                at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
 
        dev_dbg(dev->dev, "read 0x%x, to go %d\n", *dev->buf, dev->buf_len);
@@ -426,7 +428,7 @@ static void at91_twi_read_data_dma_callback(void *data)
        dma_unmap_single(dev->dev, sg_dma_address(&dev->dma.sg[0]),
                         dev->buf_len, DMA_FROM_DEVICE);
 
-       if (!dev->pdata->has_alt_cmd) {
+       if (!dev->use_alt_cmd) {
                /* The last two bytes have to be read without using dma */
                dev->buf += dev->buf_len - 2;
                dev->buf_len = 2;
@@ -443,7 +445,7 @@ static void at91_twi_read_data_dma(struct at91_twi_dev *dev)
        struct dma_chan *chan_rx = dma->chan_rx;
        size_t buf_len;
 
-       buf_len = (dev->pdata->has_alt_cmd) ? dev->buf_len : dev->buf_len - 2;
+       buf_len = (dev->use_alt_cmd) ? dev->buf_len : dev->buf_len - 2;
        dma->direction = DMA_FROM_DEVICE;
 
        /* Keep in mind that we won't use dma to read the last two bytes */
@@ -651,7 +653,7 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev)
                unsigned start_flags = AT91_TWI_START;
 
                /* if only one byte is to be read, immediately stop transfer */
-               if (!has_alt_cmd && dev->buf_len <= 1 &&
+               if (!dev->use_alt_cmd && dev->buf_len <= 1 &&
                    !(dev->msg->flags & I2C_M_RECV_LEN))
                        start_flags |= AT91_TWI_STOP;
                at91_twi_write(dev, AT91_TWI_CR, start_flags);
@@ -745,7 +747,7 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num)
        int ret;
        unsigned int_addr_flag = 0;
        struct i2c_msg *m_start = msg;
-       bool is_read, use_alt_cmd = false;
+       bool is_read;
 
        dev_dbg(&adap->dev, "at91_xfer: processing %d messages:\n", num);
 
@@ -768,14 +770,16 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num)
                at91_twi_write(dev, AT91_TWI_IADR, internal_address);
        }
 
+       dev->use_alt_cmd = false;
        is_read = (m_start->flags & I2C_M_RD);
        if (dev->pdata->has_alt_cmd) {
-               if (m_start->len > 0) {
+               if (m_start->len > 0 &&
+                   m_start->len < AT91_I2C_MAX_ALT_CMD_DATA_SIZE) {
                        at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_ACMEN);
                        at91_twi_write(dev, AT91_TWI_ACR,
                                       AT91_TWI_ACR_DATAL(m_start->len) |
                                       ((is_read) ? AT91_TWI_ACR_DIR : 0));
-                       use_alt_cmd = true;
+                       dev->use_alt_cmd = true;
                } else {
                        at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_ACMDIS);
                }
@@ -784,7 +788,7 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num)
        at91_twi_write(dev, AT91_TWI_MMR,
                       (m_start->addr << 16) |
                       int_addr_flag |
-                      ((!use_alt_cmd && is_read) ? AT91_TWI_MREAD : 0));
+                      ((!dev->use_alt_cmd && is_read) ? AT91_TWI_MREAD : 0));
 
        dev->buf_len = m_start->len;
        dev->buf = m_start->buf;
index 19c8438..95f7cac 100644 (file)
@@ -158,7 +158,7 @@ static irqreturn_t bcm_iproc_i2c_isr(int irq, void *data)
 
        if (status & BIT(IS_M_START_BUSY_SHIFT)) {
                iproc_i2c->xfer_is_done = 1;
-               complete_all(&iproc_i2c->done);
+               complete(&iproc_i2c->done);
        }
 
        writel(status, iproc_i2c->base + IS_OFFSET);
index ac9f476..258cb9a 100644 (file)
@@ -229,7 +229,7 @@ static irqreturn_t bcm_kona_i2c_isr(int irq, void *devid)
                       dev->base + TXFCR_OFFSET);
 
        writel(status & ~ISR_RESERVED_MASK, dev->base + ISR_OFFSET);
-       complete_all(&dev->done);
+       complete(&dev->done);
 
        return IRQ_HANDLED;
 }
@@ -643,7 +643,7 @@ static int bcm_kona_i2c_xfer(struct i2c_adapter *adapter,
                        if (rc < 0) {
                                dev_err(dev->device,
                                        "restart cmd failed rc = %d\n", rc);
-                                       goto xfer_send_stop;
+                               goto xfer_send_stop;
                        }
                }
 
index 3f5a4d7..385b57b 100644 (file)
@@ -228,7 +228,7 @@ static irqreturn_t brcmstb_i2c_isr(int irq, void *devid)
                return IRQ_NONE;
 
        brcmstb_i2c_enable_disable_irq(dev, INT_DISABLE);
-       complete_all(&dev->done);
+       complete(&dev->done);
 
        dev_dbg(dev->device, "isr handled");
        return IRQ_HANDLED;
index 90bbd9f..3c16a2f 100644 (file)
@@ -767,7 +767,7 @@ static int cdns_i2c_setclk(unsigned long clk_in, struct cdns_i2c *id)
  * depending on the scaling direction.
  *
  * Return:     NOTIFY_STOP if the rate change should be aborted, NOTIFY_OK
- *             to acknowedge the change, NOTIFY_DONE if the notification is
+ *             to acknowledge the change, NOTIFY_DONE if the notification is
  *             considered irrelevant.
  */
 static int cdns_i2c_clk_notifier_cb(struct notifier_block *nb, unsigned long
index a0d95ff..2d5ff86 100644 (file)
@@ -215,7 +215,7 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[],
        msg->outsize = request_len;
        msg->insize = response_len;
 
-       result = cros_ec_cmd_xfer(bus->ec, msg);
+       result = cros_ec_cmd_xfer_status(bus->ec, msg);
        if (result < 0) {
                dev_err(dev, "Error transferring EC i2c message %d\n", result);
                goto exit;
index c6922b8..fcd973d 100644 (file)
@@ -367,13 +367,17 @@ int i2c_dw_init(struct dw_i2c_dev *dev)
        dev_dbg(dev->dev, "Fast-mode HCNT:LCNT = %d:%d\n", hcnt, lcnt);
 
        /* Configure SDA Hold Time if required */
-       if (dev->sda_hold_time) {
-               reg = dw_readl(dev, DW_IC_COMP_VERSION);
-               if (reg >= DW_IC_SDA_HOLD_MIN_VERS)
+       reg = dw_readl(dev, DW_IC_COMP_VERSION);
+       if (reg >= DW_IC_SDA_HOLD_MIN_VERS) {
+               if (dev->sda_hold_time) {
                        dw_writel(dev, dev->sda_hold_time, DW_IC_SDA_HOLD);
-               else
-                       dev_warn(dev->dev,
-                               "Hardware too old to adjust SDA hold time.");
+               } else {
+                       /* Keep previous hold time setting if no one set it */
+                       dev->sda_hold_time = dw_readl(dev, DW_IC_SDA_HOLD);
+               }
+       } else {
+               dev_warn(dev->dev,
+                       "Hardware too old to adjust SDA hold time.\n");
        }
 
        /* Configure Tx/Rx FIFO threshold levels */
index 71d3929..76e2898 100644 (file)
@@ -211,7 +211,7 @@ static void meson_i2c_stop(struct meson_i2c *i2c)
                meson_i2c_add_token(i2c, TOKEN_STOP);
        } else {
                i2c->state = STATE_IDLE;
-               complete_all(&i2c->done);
+               complete(&i2c->done);
        }
 }
 
@@ -238,7 +238,7 @@ static irqreturn_t meson_i2c_irq(int irqno, void *dev_id)
                dev_dbg(i2c->dev, "error bit set\n");
                i2c->error = -ENXIO;
                i2c->state = STATE_IDLE;
-               complete_all(&i2c->done);
+               complete(&i2c->done);
                goto out;
        }
 
@@ -269,7 +269,7 @@ static irqreturn_t meson_i2c_irq(int irqno, void *dev_id)
                break;
        case STATE_STOP:
                i2c->state = STATE_IDLE;
-               complete_all(&i2c->done);
+               complete(&i2c->done);
                break;
        case STATE_IDLE:
                break;
index dfa7a4b..ac88a52 100644 (file)
@@ -379,6 +379,7 @@ static int ocores_i2c_of_probe(struct platform_device *pdev,
                        if (!clock_frequency_present) {
                                dev_err(&pdev->dev,
                                        "Missing required parameter 'opencores,ip-clock-frequency'\n");
+                               clk_disable_unprepare(i2c->clk);
                                return -ENODEV;
                        }
                        i2c->ip_clock_khz = clock_frequency / 1000;
@@ -467,20 +468,21 @@ static int ocores_i2c_probe(struct platform_device *pdev)
                default:
                        dev_err(&pdev->dev, "Unsupported I/O width (%d)\n",
                                i2c->reg_io_width);
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto err_clk;
                }
        }
 
        ret = ocores_init(&pdev->dev, i2c);
        if (ret)
-               return ret;
+               goto err_clk;
 
        init_waitqueue_head(&i2c->wait);
        ret = devm_request_irq(&pdev->dev, irq, ocores_isr, 0,
                               pdev->name, i2c);
        if (ret) {
                dev_err(&pdev->dev, "Cannot claim IRQ\n");
-               return ret;
+               goto err_clk;
        }
 
        /* hook up driver to tree */
@@ -494,7 +496,7 @@ static int ocores_i2c_probe(struct platform_device *pdev)
        ret = i2c_add_adapter(&i2c->adap);
        if (ret) {
                dev_err(&pdev->dev, "Failed to add adapter\n");
-               return ret;
+               goto err_clk;
        }
 
        /* add in known devices to the bus */
@@ -504,6 +506,10 @@ static int ocores_i2c_probe(struct platform_device *pdev)
        }
 
        return 0;
+
+err_clk:
+       clk_disable_unprepare(i2c->clk);
+       return ret;
 }
 
 static int ocores_i2c_remove(struct platform_device *pdev)
index 52407f3..9bd849d 100644 (file)
@@ -378,7 +378,7 @@ static void rcar_i2c_dma(struct rcar_i2c_priv *priv)
        }
 
        dma_addr = dma_map_single(chan->device->dev, buf, len, dir);
-       if (dma_mapping_error(dev, dma_addr)) {
+       if (dma_mapping_error(chan->device->dev, dma_addr)) {
                dev_dbg(dev, "dma map failed, using PIO\n");
                return;
        }
index 2bc8b01..5c5b7ca 100644 (file)
@@ -918,7 +918,7 @@ static void rk3x_i2c_adapt_div(struct rk3x_i2c *i2c, unsigned long clk_rate)
  * Code adapted from i2c-cadence.c.
  *
  * Return:     NOTIFY_STOP if the rate change should be aborted, NOTIFY_OK
- *             to acknowedge the change, NOTIFY_DONE if the notification is
+ *             to acknowledge the change, NOTIFY_DONE if the notification is
  *             considered irrelevant.
  */
 static int rk3x_i2c_clk_notifier_cb(struct notifier_block *nb, unsigned long
@@ -1111,6 +1111,15 @@ static int rk3x_i2c_xfer(struct i2c_adapter *adap,
        return ret < 0 ? ret : num;
 }
 
+static __maybe_unused int rk3x_i2c_resume(struct device *dev)
+{
+       struct rk3x_i2c *i2c = dev_get_drvdata(dev);
+
+       rk3x_i2c_adapt_div(i2c, clk_get_rate(i2c->clk));
+
+       return 0;
+}
+
 static u32 rk3x_i2c_func(struct i2c_adapter *adap)
 {
        return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL | I2C_FUNC_PROTOCOL_MANGLING;
@@ -1334,12 +1343,15 @@ static int rk3x_i2c_remove(struct platform_device *pdev)
        return 0;
 }
 
+static SIMPLE_DEV_PM_OPS(rk3x_i2c_pm_ops, NULL, rk3x_i2c_resume);
+
 static struct platform_driver rk3x_i2c_driver = {
        .probe   = rk3x_i2c_probe,
        .remove  = rk3x_i2c_remove,
        .driver  = {
                .name  = "rk3x-i2c",
                .of_match_table = rk3x_i2c_match,
+               .pm = &rk3x_i2c_pm_ops,
        },
 };
 
index 6fb3e26..05b1eea 100644 (file)
@@ -610,7 +610,7 @@ static void sh_mobile_i2c_xfer_dma(struct sh_mobile_i2c_data *pd)
                return;
 
        dma_addr = dma_map_single(chan->device->dev, pd->msg->buf, pd->msg->len, dir);
-       if (dma_mapping_error(pd->dev, dma_addr)) {
+       if (dma_mapping_error(chan->device->dev, dma_addr)) {
                dev_dbg(pd->dev, "dma map failed, using PIO\n");
                return;
        }
index 8de073a..b3893f6 100644 (file)
@@ -37,8 +37,6 @@ struct i2c_demux_pinctrl_priv {
        struct i2c_demux_pinctrl_chan chan[];
 };
 
-static struct property status_okay = { .name = "status", .length = 3, .value = "ok" };
-
 static int i2c_demux_master_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 {
        struct i2c_demux_pinctrl_priv *priv = adap->algo_data;
@@ -68,7 +66,7 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne
        adap = of_find_i2c_adapter_by_node(priv->chan[new_chan].parent_np);
        if (!adap) {
                ret = -ENODEV;
-               goto err;
+               goto err_with_revert;
        }
 
        p = devm_pinctrl_get_select(adap->dev.parent, priv->bus_name);
@@ -103,8 +101,11 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne
 
  err_with_put:
        i2c_put_adapter(adap);
+ err_with_revert:
+       of_changeset_revert(&priv->chan[new_chan].chgset);
  err:
        dev_err(priv->dev, "failed to setup demux-adapter %d (%d)\n", new_chan, ret);
+       priv->cur_chan = -EINVAL;
        return ret;
 }
 
@@ -190,6 +191,7 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
        struct i2c_demux_pinctrl_priv *priv;
+       struct property *props;
        int num_chan, i, j, err;
 
        num_chan = of_count_phandle_with_args(np, "i2c-parent", NULL);
@@ -200,7 +202,10 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev)
 
        priv = devm_kzalloc(&pdev->dev, sizeof(*priv)
                           + num_chan * sizeof(struct i2c_demux_pinctrl_chan), GFP_KERNEL);
-       if (!priv)
+
+       props = devm_kcalloc(&pdev->dev, num_chan, sizeof(*props), GFP_KERNEL);
+
+       if (!priv || !props)
                return -ENOMEM;
 
        err = of_property_read_string(np, "i2c-bus-name", &priv->bus_name);
@@ -218,8 +223,12 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev)
                }
                priv->chan[i].parent_np = adap_np;
 
+               props[i].name = devm_kstrdup(&pdev->dev, "status", GFP_KERNEL);
+               props[i].value = devm_kstrdup(&pdev->dev, "ok", GFP_KERNEL);
+               props[i].length = 3;
+
                of_changeset_init(&priv->chan[i].chgset);
-               of_changeset_update_property(&priv->chan[i].chgset, adap_np, &status_okay);
+               of_changeset_update_property(&priv->chan[i].chgset, adap_np, &props[i]);
        }
 
        priv->num_chan = num_chan;
index 89d7820..78f148e 100644 (file)
@@ -20,6 +20,8 @@ config BMA180
 config BMA220
     tristate "Bosch BMA220 3-Axis Accelerometer Driver"
        depends on SPI
+       select IIO_BUFFER
+       select IIO_TRIGGERED_BUFFER
     help
       Say yes here to add support for the Bosch BMA220 triaxial
       acceleration sensor.
@@ -234,7 +236,8 @@ config STK8312
 config STK8BA50
        tristate "Sensortek STK8BA50 3-Axis Accelerometer Driver"
        depends on I2C
-       depends on IIO_TRIGGER
+       select IIO_BUFFER
+       select IIO_TRIGGERED_BUFFER
        help
          Say yes here to get support for the Sensortek STK8BA50 3-axis
          accelerometer.
index 1098d10..5099f29 100644 (file)
@@ -253,7 +253,7 @@ static int bma220_probe(struct spi_device *spi)
        if (ret < 0)
                return ret;
 
-       ret = iio_triggered_buffer_setup(indio_dev, NULL,
+       ret = iio_triggered_buffer_setup(indio_dev, iio_pollfunc_store_time,
                                         bma220_trigger_handler, NULL);
        if (ret < 0) {
                dev_err(&spi->dev, "iio triggered buffer setup failed\n");
index bf17aae..59b380d 100644 (file)
@@ -67,6 +67,9 @@
 #define BMC150_ACCEL_REG_PMU_BW                0x10
 #define BMC150_ACCEL_DEF_BW                    125
 
+#define BMC150_ACCEL_REG_RESET                 0x14
+#define BMC150_ACCEL_RESET_VAL                 0xB6
+
 #define BMC150_ACCEL_REG_INT_MAP_0             0x19
 #define BMC150_ACCEL_INT_MAP_0_BIT_SLOPE       BIT(2)
 
@@ -1497,6 +1500,14 @@ static int bmc150_accel_chip_init(struct bmc150_accel_data *data)
        int ret, i;
        unsigned int val;
 
+       /*
+        * Reset chip to get it in a known good state. A delay of 1.8ms after
+        * reset is required according to the data sheets of supported chips.
+        */
+       regmap_write(data->regmap, BMC150_ACCEL_REG_RESET,
+                    BMC150_ACCEL_RESET_VAL);
+       usleep_range(1800, 2500);
+
        ret = regmap_read(data->regmap, BMC150_ACCEL_REG_CHIP_ID, &val);
        if (ret < 0) {
                dev_err(dev, "Error: Reading chip id\n");
index 3a9f106..9d72d4b 100644 (file)
@@ -160,11 +160,13 @@ static int kxsd9_read_raw(struct iio_dev *indio_dev,
                if (ret < 0)
                        goto error_ret;
                *val = ret;
+               ret = IIO_VAL_INT;
                break;
        case IIO_CHAN_INFO_SCALE:
                ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C));
                if (ret < 0)
                        goto error_ret;
+               *val = 0;
                *val2 = kxsd9_micro_scales[ret & KXSD9_FS_MASK];
                ret = IIO_VAL_INT_PLUS_MICRO;
                break;
index 1de31bd..7675772 100644 (file)
@@ -389,6 +389,7 @@ config QCOM_SPMI_VADC
 config ROCKCHIP_SARADC
        tristate "Rockchip SARADC driver"
        depends on ARCH_ROCKCHIP || (ARM && COMPILE_TEST)
+       depends on RESET_CONTROLLER
        help
          Say yes here to build support for the SARADC found in SoCs from
          Rockchip.
index b616376..9704090 100644 (file)
@@ -527,6 +527,7 @@ static struct attribute_group ad799x_event_attrs_group = {
 static const struct iio_info ad7991_info = {
        .read_raw = &ad799x_read_raw,
        .driver_module = THIS_MODULE,
+       .update_scan_mode = ad799x_update_scan_mode,
 };
 
 static const struct iio_info ad7993_4_7_8_noirq_info = {
index 52430ba..0438c68 100644 (file)
@@ -381,8 +381,8 @@ static irqreturn_t at91_adc_rl_interrupt(int irq, void *private)
                st->ts_bufferedmeasure = false;
                input_report_key(st->ts_input, BTN_TOUCH, 0);
                input_sync(st->ts_input);
-       } else if (status & AT91_ADC_EOC(3)) {
-               /* Conversion finished */
+       } else if (status & AT91_ADC_EOC(3) && st->ts_input) {
+               /* Conversion finished and we've a touchscreen */
                if (st->ts_bufferedmeasure) {
                        /*
                         * Last measurement is always discarded, since it can
index f9ad6c2..85d7012 100644 (file)
@@ -21,6 +21,8 @@
 #include <linux/of_device.h>
 #include <linux/clk.h>
 #include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/reset.h>
 #include <linux/regulator/consumer.h>
 #include <linux/iio/iio.h>
 
@@ -53,6 +55,7 @@ struct rockchip_saradc {
        struct clk              *clk;
        struct completion       completion;
        struct regulator        *vref;
+       struct reset_control    *reset;
        const struct rockchip_saradc_data *data;
        u16                     last_val;
 };
@@ -190,6 +193,16 @@ static const struct of_device_id rockchip_saradc_match[] = {
 };
 MODULE_DEVICE_TABLE(of, rockchip_saradc_match);
 
+/**
+ * Reset SARADC Controller.
+ */
+static void rockchip_saradc_reset_controller(struct reset_control *reset)
+{
+       reset_control_assert(reset);
+       usleep_range(10, 20);
+       reset_control_deassert(reset);
+}
+
 static int rockchip_saradc_probe(struct platform_device *pdev)
 {
        struct rockchip_saradc *info = NULL;
@@ -218,6 +231,20 @@ static int rockchip_saradc_probe(struct platform_device *pdev)
        if (IS_ERR(info->regs))
                return PTR_ERR(info->regs);
 
+       /*
+        * The reset should be an optional property, as it should work
+        * with old devicetrees as well
+        */
+       info->reset = devm_reset_control_get(&pdev->dev, "saradc-apb");
+       if (IS_ERR(info->reset)) {
+               ret = PTR_ERR(info->reset);
+               if (ret != -ENOENT)
+                       return ret;
+
+               dev_dbg(&pdev->dev, "no reset control found\n");
+               info->reset = NULL;
+       }
+
        init_completion(&info->completion);
 
        irq = platform_get_irq(pdev, 0);
@@ -252,6 +279,9 @@ static int rockchip_saradc_probe(struct platform_device *pdev)
                return PTR_ERR(info->vref);
        }
 
+       if (info->reset)
+               rockchip_saradc_reset_controller(info->reset);
+
        /*
         * Use a default value for the converter clock.
         * This may become user-configurable in the future.
index 1ef3987..066abaf 100644 (file)
@@ -489,7 +489,8 @@ static struct iio_info ads1115_info = {
 #ifdef CONFIG_OF
 static int ads1015_get_channels_config_of(struct i2c_client *client)
 {
-       struct ads1015_data *data = i2c_get_clientdata(client);
+       struct iio_dev *indio_dev = i2c_get_clientdata(client);
+       struct ads1015_data *data = iio_priv(indio_dev);
        struct device_node *node;
 
        if (!client->dev.of_node ||
index 8a36875..c3cfacc 100644 (file)
@@ -32,6 +32,7 @@
 
 struct tiadc_device {
        struct ti_tscadc_dev *mfd_tscadc;
+       struct mutex fifo1_lock; /* to protect fifo access */
        int channels;
        u8 channel_line[8];
        u8 channel_step[8];
@@ -359,6 +360,7 @@ static int tiadc_read_raw(struct iio_dev *indio_dev,
                int *val, int *val2, long mask)
 {
        struct tiadc_device *adc_dev = iio_priv(indio_dev);
+       int ret = IIO_VAL_INT;
        int i, map_val;
        unsigned int fifo1count, read, stepid;
        bool found = false;
@@ -372,13 +374,14 @@ static int tiadc_read_raw(struct iio_dev *indio_dev,
        if (!step_en)
                return -EINVAL;
 
+       mutex_lock(&adc_dev->fifo1_lock);
        fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT);
        while (fifo1count--)
                tiadc_readl(adc_dev, REG_FIFO1);
 
        am335x_tsc_se_set_once(adc_dev->mfd_tscadc, step_en);
 
-       timeout = jiffies + usecs_to_jiffies
+       timeout = jiffies + msecs_to_jiffies
                                (IDLE_TIMEOUT * adc_dev->channels);
        /* Wait for Fifo threshold interrupt */
        while (1) {
@@ -388,7 +391,8 @@ static int tiadc_read_raw(struct iio_dev *indio_dev,
 
                if (time_after(jiffies, timeout)) {
                        am335x_tsc_se_adc_done(adc_dev->mfd_tscadc);
-                       return -EAGAIN;
+                       ret = -EAGAIN;
+                       goto err_unlock;
                }
        }
        map_val = adc_dev->channel_step[chan->scan_index];
@@ -414,8 +418,11 @@ static int tiadc_read_raw(struct iio_dev *indio_dev,
        am335x_tsc_se_adc_done(adc_dev->mfd_tscadc);
 
        if (found == false)
-               return -EBUSY;
-       return IIO_VAL_INT;
+               ret =  -EBUSY;
+
+err_unlock:
+       mutex_unlock(&adc_dev->fifo1_lock);
+       return ret;
 }
 
 static const struct iio_info tiadc_info = {
@@ -483,6 +490,7 @@ static int tiadc_probe(struct platform_device *pdev)
 
        tiadc_step_config(indio_dev);
        tiadc_writel(adc_dev, REG_FIFO1THR, FIFO1_THRESHOLD);
+       mutex_init(&adc_dev->fifo1_lock);
 
        err = tiadc_channel_init(indio_dev, adc_dev->channels);
        if (err < 0)
index ae038a5..407f141 100644 (file)
@@ -434,7 +434,7 @@ static int atlas_read_raw(struct iio_dev *indio_dev,
                        break;
                case IIO_ELECTRICALCONDUCTIVITY:
                        *val = 1; /* 0.00001 */
-                       *val = 100000;
+                       *val2 = 100000;
                        break;
                case IIO_CONCENTRATION:
                        *val = 0; /* 0.000000001 */
index e81f434..dc33c1d 100644 (file)
@@ -56,8 +56,8 @@ static struct {
        {HID_USAGE_SENSOR_ALS, 0, 1, 0},
        {HID_USAGE_SENSOR_ALS, HID_USAGE_SENSOR_UNITS_LUX, 1, 0},
 
-       {HID_USAGE_SENSOR_PRESSURE, 0, 100000, 0},
-       {HID_USAGE_SENSOR_PRESSURE, HID_USAGE_SENSOR_UNITS_PASCAL, 1, 0},
+       {HID_USAGE_SENSOR_PRESSURE, 0, 100, 0},
+       {HID_USAGE_SENSOR_PRESSURE, HID_USAGE_SENSOR_UNITS_PASCAL, 0, 1000},
 };
 
 static int pow_10(unsigned power)
index 792a971..bebbd00 100644 (file)
@@ -65,6 +65,16 @@ struct stx104_gpio {
        unsigned int out_state;
 };
 
+/**
+ * struct stx104_dev - STX104 device private data structure
+ * @indio_dev: IIO device
+ * @chip:      instance of the gpio_chip
+ */
+struct stx104_dev {
+       struct iio_dev *indio_dev;
+       struct gpio_chip *chip;
+};
+
 static int stx104_read_raw(struct iio_dev *indio_dev,
        struct iio_chan_spec const *chan, int *val, int *val2, long mask)
 {
@@ -107,6 +117,7 @@ static const struct iio_chan_spec stx104_channels[STX104_NUM_CHAN] = {
 static int stx104_gpio_get_direction(struct gpio_chip *chip,
        unsigned int offset)
 {
+       /* GPIO 0-3 are input only, while the rest are output only */
        if (offset < 4)
                return 1;
 
@@ -169,6 +180,7 @@ static int stx104_probe(struct device *dev, unsigned int id)
        struct iio_dev *indio_dev;
        struct stx104_iio *priv;
        struct stx104_gpio *stx104gpio;
+       struct stx104_dev *stx104dev;
        int err;
 
        indio_dev = devm_iio_device_alloc(dev, sizeof(*priv));
@@ -179,6 +191,10 @@ static int stx104_probe(struct device *dev, unsigned int id)
        if (!stx104gpio)
                return -ENOMEM;
 
+       stx104dev = devm_kzalloc(dev, sizeof(*stx104dev), GFP_KERNEL);
+       if (!stx104dev)
+               return -ENOMEM;
+
        if (!devm_request_region(dev, base[id], STX104_EXTENT,
                dev_name(dev))) {
                dev_err(dev, "Unable to lock port addresses (0x%X-0x%X)\n",
@@ -199,12 +215,6 @@ static int stx104_probe(struct device *dev, unsigned int id)
        outw(0, base[id] + 4);
        outw(0, base[id] + 6);
 
-       err = devm_iio_device_register(dev, indio_dev);
-       if (err) {
-               dev_err(dev, "IIO device registering failed (%d)\n", err);
-               return err;
-       }
-
        stx104gpio->chip.label = dev_name(dev);
        stx104gpio->chip.parent = dev;
        stx104gpio->chip.owner = THIS_MODULE;
@@ -220,7 +230,9 @@ static int stx104_probe(struct device *dev, unsigned int id)
 
        spin_lock_init(&stx104gpio->lock);
 
-       dev_set_drvdata(dev, stx104gpio);
+       stx104dev->indio_dev = indio_dev;
+       stx104dev->chip = &stx104gpio->chip;
+       dev_set_drvdata(dev, stx104dev);
 
        err = gpiochip_add_data(&stx104gpio->chip, stx104gpio);
        if (err) {
@@ -228,14 +240,22 @@ static int stx104_probe(struct device *dev, unsigned int id)
                return err;
        }
 
+       err = iio_device_register(indio_dev);
+       if (err) {
+               dev_err(dev, "IIO device registering failed (%d)\n", err);
+               gpiochip_remove(&stx104gpio->chip);
+               return err;
+       }
+
        return 0;
 }
 
 static int stx104_remove(struct device *dev, unsigned int id)
 {
-       struct stx104_gpio *const stx104gpio = dev_get_drvdata(dev);
+       struct stx104_dev *const stx104dev = dev_get_drvdata(dev);
 
-       gpiochip_remove(&stx104gpio->chip);
+       iio_device_unregister(stx104dev->indio_dev);
+       gpiochip_remove(stx104dev->chip);
 
        return 0;
 }
index 738a86d..d041243 100644 (file)
@@ -6,6 +6,8 @@ menu "Humidity sensors"
 config AM2315
     tristate "Aosong AM2315 relative humidity and temperature sensor"
     depends on I2C
+    select IIO_BUFFER
+    select IIO_TRIGGERED_BUFFER
     help
       If you say yes here you get support for the Aosong AM2315
       relative humidity and ambient temperature sensor.
index 3e200f6..ff96b6d 100644 (file)
@@ -244,7 +244,7 @@ static int am2315_probe(struct i2c_client *client,
        indio_dev->channels = am2315_channels;
        indio_dev->num_channels = ARRAY_SIZE(am2315_channels);
 
-       ret = iio_triggered_buffer_setup(indio_dev, NULL,
+       ret = iio_triggered_buffer_setup(indio_dev, iio_pollfunc_store_time,
                                         am2315_trigger_handler, NULL);
        if (ret < 0) {
                dev_err(&client->dev, "iio triggered buffer setup failed\n");
index a03832a..e0c9c70 100644 (file)
@@ -142,7 +142,7 @@ static int hdc100x_get_measurement(struct hdc100x_data *data,
        struct i2c_client *client = data->client;
        int delay = data->adc_int_us[chan->address];
        int ret;
-       int val;
+       __be16 val;
 
        /* start measurement */
        ret = i2c_smbus_write_byte(client, chan->address);
@@ -154,26 +154,13 @@ static int hdc100x_get_measurement(struct hdc100x_data *data,
        /* wait for integration time to pass */
        usleep_range(delay, delay + 1000);
 
-       /*
-        * i2c_smbus_read_word_data cannot() be used here due to the command
-        * value not being understood and causes NAKs preventing any reading
-        * from being accessed.
-        */
-       ret = i2c_smbus_read_byte(client);
+       /* read measurement */
+       ret = i2c_master_recv(data->client, (char *)&val, sizeof(val));
        if (ret < 0) {
-               dev_err(&client->dev, "cannot read high byte measurement");
+               dev_err(&client->dev, "cannot read sensor data\n");
                return ret;
        }
-       val = ret << 8;
-
-       ret = i2c_smbus_read_byte(client);
-       if (ret < 0) {
-               dev_err(&client->dev, "cannot read low byte measurement");
-               return ret;
-       }
-       val |= ret;
-
-       return val;
+       return be16_to_cpu(val);
 }
 
 static int hdc100x_get_heater_status(struct hdc100x_data *data)
@@ -272,8 +259,8 @@ static int hdc100x_probe(struct i2c_client *client,
        struct iio_dev *indio_dev;
        struct hdc100x_data *data;
 
-       if (!i2c_check_functionality(client->adapter,
-                               I2C_FUNC_SMBUS_WORD_DATA | I2C_FUNC_SMBUS_BYTE))
+       if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_WORD_DATA |
+                                    I2C_FUNC_SMBUS_BYTE | I2C_FUNC_I2C))
                return -EOPNOTSUPP;
 
        indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*data));
index 90462fc..158aaf4 100644 (file)
@@ -107,9 +107,10 @@ ssize_t iio_buffer_read_first_n_outer(struct file *filp, char __user *buf,
 {
        struct iio_dev *indio_dev = filp->private_data;
        struct iio_buffer *rb = indio_dev->buffer;
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
        size_t datum_size;
        size_t to_wait;
-       int ret;
+       int ret = 0;
 
        if (!indio_dev->info)
                return -ENODEV;
@@ -131,19 +132,29 @@ ssize_t iio_buffer_read_first_n_outer(struct file *filp, char __user *buf,
        else
                to_wait = min_t(size_t, n / datum_size, rb->watermark);
 
+       add_wait_queue(&rb->pollq, &wait);
        do {
-               ret = wait_event_interruptible(rb->pollq,
-                     iio_buffer_ready(indio_dev, rb, to_wait, n / datum_size));
-               if (ret)
-                       return ret;
+               if (!indio_dev->info) {
+                       ret = -ENODEV;
+                       break;
+               }
 
-               if (!indio_dev->info)
-                       return -ENODEV;
+               if (!iio_buffer_ready(indio_dev, rb, to_wait, n / datum_size)) {
+                       if (signal_pending(current)) {
+                               ret = -ERESTARTSYS;
+                               break;
+                       }
+
+                       wait_woken(&wait, TASK_INTERRUPTIBLE,
+                                  MAX_SCHEDULE_TIMEOUT);
+                       continue;
+               }
 
                ret = rb->access->read_first_n(rb, n, buf);
                if (ret == 0 && (filp->f_flags & O_NONBLOCK))
                        ret = -EAGAIN;
-        } while (ret == 0);
+       } while (ret == 0);
+       remove_wait_queue(&rb->pollq, &wait);
 
        return ret;
 }
index f914d5d..d2b8899 100644 (file)
@@ -613,9 +613,8 @@ ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals)
                        return sprintf(buf, "%d.%09u\n", vals[0], vals[1]);
        case IIO_VAL_FRACTIONAL:
                tmp = div_s64((s64)vals[0] * 1000000000LL, vals[1]);
-               vals[1] = do_div(tmp, 1000000000LL);
-               vals[0] = tmp;
-               return sprintf(buf, "%d.%09u\n", vals[0], vals[1]);
+               vals[0] = (int)div_s64_rem(tmp, 1000000000, &vals[1]);
+               return sprintf(buf, "%d.%09u\n", vals[0], abs(vals[1]));
        case IIO_VAL_FRACTIONAL_LOG2:
                tmp = (s64)vals[0] * 1000000000LL >> vals[1];
                vals[1] = do_div(tmp, 1000000000LL);
index 7c566f5..3574945 100644 (file)
@@ -76,7 +76,6 @@ config BH1750
 config BH1780
        tristate "ROHM BH1780 ambient light sensor"
        depends on I2C
-       depends on !SENSORS_BH1780
        help
         Say Y here to build support for the ROHM BH1780GLI ambient
         light sensor.
@@ -238,6 +237,8 @@ config MAX44000
        tristate "MAX44000 Ambient and Infrared Proximity Sensor"
        depends on I2C
        select REGMAP_I2C
+       select IIO_BUFFER
+       select IIO_TRIGGERED_BUFFER
        help
         Say Y here if you want to build support for Maxim Integrated's
         MAX44000 ambient and infrared proximity sensor device.
index 6943688..e5a533c 100644 (file)
@@ -970,7 +970,7 @@ int bmp280_common_probe(struct device *dev,
        data->vdda = devm_regulator_get(dev, "vdda");
        if (IS_ERR(data->vdda)) {
                dev_err(dev, "failed to get VDDA regulator\n");
-               ret = PTR_ERR(data->vddd);
+               ret = PTR_ERR(data->vdda);
                goto out_disable_vddd;
        }
        ret = regulator_enable(data->vdda);
@@ -1079,7 +1079,8 @@ EXPORT_SYMBOL(bmp280_common_remove);
 #ifdef CONFIG_PM
 static int bmp280_runtime_suspend(struct device *dev)
 {
-       struct bmp280_data *data = dev_get_drvdata(dev);
+       struct iio_dev *indio_dev = dev_get_drvdata(dev);
+       struct bmp280_data *data = iio_priv(indio_dev);
        int ret;
 
        ret = regulator_disable(data->vdda);
@@ -1090,7 +1091,8 @@ static int bmp280_runtime_suspend(struct device *dev)
 
 static int bmp280_runtime_resume(struct device *dev)
 {
-       struct bmp280_data *data = dev_get_drvdata(dev);
+       struct iio_dev *indio_dev = dev_get_drvdata(dev);
+       struct bmp280_data *data = iio_priv(indio_dev);
        int ret;
 
        ret = regulator_enable(data->vddd);
index 2e3a70e..5656deb 100644 (file)
@@ -397,7 +397,7 @@ static int as3935_probe(struct spi_device *spi)
                return ret;
        }
 
-       ret = iio_triggered_buffer_setup(indio_dev, NULL,
+       ret = iio_triggered_buffer_setup(indio_dev, iio_pollfunc_store_time,
                &as3935_trigger_handler, NULL);
 
        if (ret) {
index e6dfa1b..5f65a78 100644 (file)
@@ -2462,18 +2462,24 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 
        if (addr->dev_addr.bound_dev_if) {
                ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
-               if (!ndev)
-                       return -ENODEV;
+               if (!ndev) {
+                       ret = -ENODEV;
+                       goto err2;
+               }
 
                if (ndev->flags & IFF_LOOPBACK) {
                        dev_put(ndev);
-                       if (!id_priv->id.device->get_netdev)
-                               return -EOPNOTSUPP;
+                       if (!id_priv->id.device->get_netdev) {
+                               ret = -EOPNOTSUPP;
+                               goto err2;
+                       }
 
                        ndev = id_priv->id.device->get_netdev(id_priv->id.device,
                                                              id_priv->id.port_num);
-                       if (!ndev)
-                               return -ENODEV;
+                       if (!ndev) {
+                               ret = -ENODEV;
+                               goto err2;
+                       }
                }
 
                route->path_rec->net = &init_net;
index 3a3c5d7..51c79b2 100644 (file)
@@ -106,7 +106,6 @@ struct mcast_group {
        atomic_t                refcount;
        enum mcast_group_state  state;
        struct ib_sa_query      *query;
-       int                     query_id;
        u16                     pkey_index;
        u8                      leave_state;
        int                     retries;
@@ -340,11 +339,7 @@ static int send_join(struct mcast_group *group, struct mcast_member *member)
                                       member->multicast.comp_mask,
                                       3000, GFP_KERNEL, join_handler, group,
                                       &group->query);
-       if (ret >= 0) {
-               group->query_id = ret;
-               ret = 0;
-       }
-       return ret;
+       return (ret > 0) ? 0 : ret;
 }
 
 static int send_leave(struct mcast_group *group, u8 leave_state)
@@ -364,11 +359,7 @@ static int send_leave(struct mcast_group *group, u8 leave_state)
                                       IB_SA_MCMEMBER_REC_JOIN_STATE,
                                       3000, GFP_KERNEL, leave_handler,
                                       group, &group->query);
-       if (ret >= 0) {
-               group->query_id = ret;
-               ret = 0;
-       }
-       return ret;
+       return (ret > 0) ? 0 : ret;
 }
 
 static void join_group(struct mcast_group *group, struct mcast_member *member,
index 23f38cf..afe8b28 100644 (file)
@@ -1,6 +1,7 @@
 config INFINIBAND_CXGB4
        tristate "Chelsio T4/T5 RDMA Driver"
        depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n)
+       select CHELSIO_LIB
        select GENERIC_ALLOCATOR
        ---help---
          This is an iWARP/RDMA driver for the Chelsio T4 and T5
index e11cf72..fa40b68 100644 (file)
@@ -1,4 +1,5 @@
 ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
+ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb
 
 obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o
 
index 3aca7f6..3cbbfbe 100644 (file)
@@ -49,6 +49,7 @@
 
 #include <rdma/ib_addr.h>
 
+#include <libcxgb_cm.h>
 #include "iw_cxgb4.h"
 #include "clip_tbl.h"
 
@@ -239,15 +240,13 @@ int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
 
 static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
 {
-       struct cpl_tid_release *req;
+       u32 len = roundup(sizeof(struct cpl_tid_release), 16);
 
-       skb = get_skb(skb, sizeof *req, GFP_KERNEL);
+       skb = get_skb(skb, len, GFP_KERNEL);
        if (!skb)
                return;
-       req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
-       INIT_TP_WR(req, hwtid);
-       OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
-       set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
+
+       cxgb_mk_tid_release(skb, len, hwtid, 0);
        c4iw_ofld_send(rdev, skb);
        return;
 }
@@ -464,72 +463,6 @@ static struct net_device *get_real_dev(struct net_device *egress_dev)
        return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
 }
 
-static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev)
-{
-       int i;
-
-       egress_dev = get_real_dev(egress_dev);
-       for (i = 0; i < dev->rdev.lldi.nports; i++)
-               if (dev->rdev.lldi.ports[i] == egress_dev)
-                       return 1;
-       return 0;
-}
-
-static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip,
-                                    __u8 *peer_ip, __be16 local_port,
-                                    __be16 peer_port, u8 tos,
-                                    __u32 sin6_scope_id)
-{
-       struct dst_entry *dst = NULL;
-
-       if (IS_ENABLED(CONFIG_IPV6)) {
-               struct flowi6 fl6;
-
-               memset(&fl6, 0, sizeof(fl6));
-               memcpy(&fl6.daddr, peer_ip, 16);
-               memcpy(&fl6.saddr, local_ip, 16);
-               if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
-                       fl6.flowi6_oif = sin6_scope_id;
-               dst = ip6_route_output(&init_net, NULL, &fl6);
-               if (!dst)
-                       goto out;
-               if (!our_interface(dev, ip6_dst_idev(dst)->dev) &&
-                   !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
-                       dst_release(dst);
-                       dst = NULL;
-               }
-       }
-
-out:
-       return dst;
-}
-
-static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
-                                __be32 peer_ip, __be16 local_port,
-                                __be16 peer_port, u8 tos)
-{
-       struct rtable *rt;
-       struct flowi4 fl4;
-       struct neighbour *n;
-
-       rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
-                                  peer_port, local_port, IPPROTO_TCP,
-                                  tos, 0);
-       if (IS_ERR(rt))
-               return NULL;
-       n = dst_neigh_lookup(&rt->dst, &peer_ip);
-       if (!n)
-               return NULL;
-       if (!our_interface(dev, n->dev) &&
-           !(n->dev->flags & IFF_LOOPBACK)) {
-               neigh_release(n);
-               dst_release(&rt->dst);
-               return NULL;
-       }
-       neigh_release(n);
-       return &rt->dst;
-}
-
 static void arp_failure_discard(void *handle, struct sk_buff *skb)
 {
        pr_err(MOD "ARP failure\n");
@@ -704,56 +637,32 @@ static int send_flowc(struct c4iw_ep *ep)
 
 static int send_halfclose(struct c4iw_ep *ep)
 {
-       struct cpl_close_con_req *req;
        struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
-       int wrlen = roundup(sizeof *req, 16);
+       u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16);
 
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
        if (WARN_ON(!skb))
                return -ENOMEM;
 
-       set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
-       t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
-       req = (struct cpl_close_con_req *) skb_put(skb, wrlen);
-       memset(req, 0, wrlen);
-       INIT_TP_WR(req, ep->hwtid);
-       OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ,
-                                                   ep->hwtid));
+       cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx,
+                             NULL, arp_failure_discard);
+
        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 }
 
 static int send_abort(struct c4iw_ep *ep)
 {
-       struct cpl_abort_req *req;
-       int wrlen = roundup(sizeof *req, 16);
+       u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
        struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
 
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
        if (WARN_ON(!req_skb))
                return -ENOMEM;
 
-       set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx);
-       t4_set_arp_err_handler(req_skb, ep, abort_arp_failure);
-       req = (struct cpl_abort_req *)skb_put(req_skb, wrlen);
-       memset(req, 0, wrlen);
-       INIT_TP_WR(req, ep->hwtid);
-       OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
-       req->cmd = CPL_ABORT_SEND_RST;
-       return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
-}
+       cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx,
+                         ep, abort_arp_failure);
 
-static void best_mtu(const unsigned short *mtus, unsigned short mtu,
-                    unsigned int *idx, int use_ts, int ipv6)
-{
-       unsigned short hdr_size = (ipv6 ?
-                                  sizeof(struct ipv6hdr) :
-                                  sizeof(struct iphdr)) +
-                                 sizeof(struct tcphdr) +
-                                 (use_ts ?
-                                  round_up(TCPOLEN_TIMESTAMP, 4) : 0);
-       unsigned short data_size = mtu - hdr_size;
-
-       cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
+       return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
 }
 
 static int send_connect(struct c4iw_ep *ep)
@@ -768,7 +677,7 @@ static int send_connect(struct c4iw_ep *ep)
        u64 opt0;
        u32 opt2;
        unsigned int mtu_idx;
-       int wscale;
+       u32 wscale;
        int win, sizev4, sizev6, wrlen;
        struct sockaddr_in *la = (struct sockaddr_in *)
                                 &ep->com.local_addr;
@@ -815,10 +724,10 @@ static int send_connect(struct c4iw_ep *ep)
        }
        set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
 
-       best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
-                enable_tcp_timestamps,
-                (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
-       wscale = compute_wscale(rcv_win);
+       cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+                     enable_tcp_timestamps,
+                     (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+       wscale = cxgb_compute_wscale(rcv_win);
 
        /*
         * Specify the largest window that will fit in opt0. The
@@ -1445,9 +1354,9 @@ static void established_upcall(struct c4iw_ep *ep)
 
 static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
 {
-       struct cpl_rx_data_ack *req;
        struct sk_buff *skb;
-       int wrlen = roundup(sizeof *req, 16);
+       u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16);
+       u32 credit_dack;
 
        PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
        skb = get_skb(NULL, wrlen, GFP_KERNEL);
@@ -1464,15 +1373,12 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
        if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
                credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
 
-       req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen);
-       memset(req, 0, wrlen);
-       INIT_TP_WR(req, ep->hwtid);
-       OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
-                                                   ep->hwtid));
-       req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK_F |
-                                      RX_DACK_CHANGE_F |
-                                      RX_DACK_MODE_V(dack_mode));
-       set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx);
+       credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F |
+                     RX_DACK_MODE_V(dack_mode);
+
+       cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx,
+                           credit_dack);
+
        c4iw_ofld_send(&ep->com.dev->rdev, skb);
        return credits;
 }
@@ -1827,8 +1733,12 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
                                (ep->mpa_pkt + sizeof(*mpa));
                        ep->ird = ntohs(mpa_v2_params->ird) &
                                MPA_V2_IRD_ORD_MASK;
+                       ep->ird = min_t(u32, ep->ird,
+                                       cur_max_read_depth(ep->com.dev));
                        ep->ord = ntohs(mpa_v2_params->ord) &
                                MPA_V2_IRD_ORD_MASK;
+                       ep->ord = min_t(u32, ep->ord,
+                                       cur_max_read_depth(ep->com.dev));
                        PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
                             ep->ord);
                        if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
@@ -1966,7 +1876,7 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
        struct sk_buff *skb;
        struct fw_ofld_connection_wr *req;
        unsigned int mtu_idx;
-       int wscale;
+       u32 wscale;
        struct sockaddr_in *sin;
        int win;
 
@@ -1991,10 +1901,10 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
                        htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F);
        req->tcb.tx_max = (__force __be32) jiffies;
        req->tcb.rcv_adv = htons(1);
-       best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
-                enable_tcp_timestamps,
-                (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
-       wscale = compute_wscale(rcv_win);
+       cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+                     enable_tcp_timestamps,
+                     (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+       wscale = cxgb_compute_wscale(rcv_win);
 
        /*
         * Specify the largest window that will fit in opt0. The
@@ -2048,15 +1958,6 @@ static inline int act_open_has_tid(int status)
                status != CPL_ERR_CONN_EXIST);
 }
 
-/* Returns whether a CPL status conveys negative advice.
- */
-static int is_neg_adv(unsigned int status)
-{
-       return status == CPL_ERR_RTX_NEG_ADVICE ||
-              status == CPL_ERR_PERSIST_NEG_ADVICE ||
-              status == CPL_ERR_KEEPALV_NEG_ADVICE;
-}
-
 static char *neg_adv_str(unsigned int status)
 {
        switch (status) {
@@ -2210,16 +2111,21 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
 
        /* find a route */
        if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
-               ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr,
-                                    raddr->sin_addr.s_addr, laddr->sin_port,
-                                    raddr->sin_port, ep->com.cm_id->tos);
+               ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev,
+                                         laddr->sin_addr.s_addr,
+                                         raddr->sin_addr.s_addr,
+                                         laddr->sin_port,
+                                         raddr->sin_port, ep->com.cm_id->tos);
                iptype = 4;
                ra = (__u8 *)&raddr->sin_addr;
        } else {
-               ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr,
-                                     raddr6->sin6_addr.s6_addr,
-                                     laddr6->sin6_port, raddr6->sin6_port, 0,
-                                     raddr6->sin6_scope_id);
+               ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi,
+                                          get_real_dev,
+                                          laddr6->sin6_addr.s6_addr,
+                                          raddr6->sin6_addr.s6_addr,
+                                          laddr6->sin6_port,
+                                          raddr6->sin6_port, 0,
+                                          raddr6->sin6_scope_id);
                iptype = 6;
                ra = (__u8 *)&raddr6->sin6_addr;
        }
@@ -2291,7 +2197,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
        PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
             status, status2errno(status));
 
-       if (is_neg_adv(status)) {
+       if (cxgb_is_neg_adv(status)) {
                PDBG("%s Connection problems for atid %u status %u (%s)\n",
                     __func__, atid, status, neg_adv_str(status));
                ep->stats.connect_neg_adv++;
@@ -2418,7 +2324,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
        unsigned int mtu_idx;
        u64 opt0;
        u32 opt2;
-       int wscale;
+       u32 wscale;
        struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
        int win;
        enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
@@ -2439,10 +2345,10 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
        OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
                                                    ep->hwtid));
 
-       best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
-                enable_tcp_timestamps && req->tcpopt.tstamp,
-                (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
-       wscale = compute_wscale(rcv_win);
+       cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+                     enable_tcp_timestamps && req->tcpopt.tstamp,
+                     (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+       wscale = cxgb_compute_wscale(rcv_win);
 
        /*
         * Specify the largest window that will fit in opt0. The
@@ -2514,42 +2420,6 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
        return;
 }
 
-static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type,
-                      int *iptype, __u8 *local_ip, __u8 *peer_ip,
-                      __be16 *local_port, __be16 *peer_port)
-{
-       int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
-                     ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
-                     T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
-       int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
-                    IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
-                    T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
-       struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
-       struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
-       struct tcphdr *tcp = (struct tcphdr *)
-                            ((u8 *)(req + 1) + eth_len + ip_len);
-
-       if (ip->version == 4) {
-               PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__,
-                    ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source),
-                    ntohs(tcp->dest));
-               *iptype = 4;
-               memcpy(peer_ip, &ip->saddr, 4);
-               memcpy(local_ip, &ip->daddr, 4);
-       } else {
-               PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__,
-                    ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source),
-                    ntohs(tcp->dest));
-               *iptype = 6;
-               memcpy(peer_ip, ip6->saddr.s6_addr, 16);
-               memcpy(local_ip, ip6->daddr.s6_addr, 16);
-       }
-       *peer_port = tcp->source;
-       *local_port = tcp->dest;
-
-       return;
-}
-
 static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 {
        struct c4iw_ep *child_ep = NULL, *parent_ep;
@@ -2578,8 +2448,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
                goto reject;
        }
 
-       get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype,
-                  local_ip, peer_ip, &local_port, &peer_port);
+       cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type,
+                       &iptype, local_ip, peer_ip, &local_port, &peer_port);
 
        /* Find output route */
        if (iptype == 4)  {
@@ -2587,18 +2457,19 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
                     , __func__, parent_ep, hwtid,
                     local_ip, peer_ip, ntohs(local_port),
                     ntohs(peer_port), peer_mss);
-               dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip,
-                                local_port, peer_port,
-                                tos);
+               dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+                                     *(__be32 *)local_ip, *(__be32 *)peer_ip,
+                                     local_port, peer_port, tos);
        } else {
                PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
                     , __func__, parent_ep, hwtid,
                     local_ip, peer_ip, ntohs(local_port),
                     ntohs(peer_port), peer_mss);
-               dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port,
-                                 PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
-                                 ((struct sockaddr_in6 *)
-                                 &parent_ep->com.local_addr)->sin6_scope_id);
+               dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
+                               local_ip, peer_ip, local_port, peer_port,
+                               PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
+                               ((struct sockaddr_in6 *)
+                                &parent_ep->com.local_addr)->sin6_scope_id);
        }
        if (!dst) {
                printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
@@ -2831,18 +2702,18 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 {
        struct cpl_abort_req_rss *req = cplhdr(skb);
        struct c4iw_ep *ep;
-       struct cpl_abort_rpl *rpl;
        struct sk_buff *rpl_skb;
        struct c4iw_qp_attributes attrs;
        int ret;
        int release = 0;
        unsigned int tid = GET_TID(req);
+       u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
 
        ep = get_ep_from_tid(dev, tid);
        if (!ep)
                return 0;
 
-       if (is_neg_adv(req->status)) {
+       if (cxgb_is_neg_adv(req->status)) {
                PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
                     __func__, ep->hwtid, req->status,
                     neg_adv_str(req->status));
@@ -2935,11 +2806,9 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
                release = 1;
                goto out;
        }
-       set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
-       rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
-       INIT_TP_WR(rpl, ep->hwtid);
-       OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
-       rpl->cmd = CPL_ABORT_NO_RST;
+
+       cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx);
+
        c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
 out:
        if (release)
@@ -3136,7 +3005,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
                if (conn_param->ord > ep->ird) {
                        if (RELAXED_IRD_NEGOTIATION) {
-                               ep->ord = ep->ird;
+                               conn_param->ord = ep->ird;
                        } else {
                                ep->ird = conn_param->ird;
                                ep->ord = conn_param->ord;
@@ -3371,9 +3240,11 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
                     __func__, &laddr->sin_addr, ntohs(laddr->sin_port),
                     ra, ntohs(raddr->sin_port));
-               ep->dst = find_route(dev, laddr->sin_addr.s_addr,
-                                    raddr->sin_addr.s_addr, laddr->sin_port,
-                                    raddr->sin_port, cm_id->tos);
+               ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+                                         laddr->sin_addr.s_addr,
+                                         raddr->sin_addr.s_addr,
+                                         laddr->sin_port,
+                                         raddr->sin_port, cm_id->tos);
        } else {
                iptype = 6;
                ra = (__u8 *)&raddr6->sin6_addr;
@@ -3392,10 +3263,12 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                     __func__, laddr6->sin6_addr.s6_addr,
                     ntohs(laddr6->sin6_port),
                     raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
-               ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr,
-                                     raddr6->sin6_addr.s6_addr,
-                                     laddr6->sin6_port, raddr6->sin6_port, 0,
-                                     raddr6->sin6_scope_id);
+               ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
+                                          laddr6->sin6_addr.s6_addr,
+                                          raddr6->sin6_addr.s6_addr,
+                                          laddr6->sin6_port,
+                                          raddr6->sin6_port, 0,
+                                          raddr6->sin6_scope_id);
        }
        if (!ep->dst) {
                printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
@@ -4037,8 +3910,9 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
             ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
             ntohs(tcph->source), iph->tos);
 
-       dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source,
-                        iph->tos);
+       dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+                             iph->daddr, iph->saddr, tcph->dest,
+                             tcph->source, iph->tos);
        if (!dst) {
                pr_err("%s - failed to find dst entry!\n",
                       __func__);
@@ -4313,7 +4187,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
                kfree_skb(skb);
                return 0;
        }
-       if (is_neg_adv(req->status)) {
+       if (cxgb_is_neg_adv(req->status)) {
                PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
                     __func__, ep->hwtid, req->status,
                     neg_adv_str(req->status));
index 812ab72..ac926c9 100644 (file)
@@ -1016,15 +1016,15 @@ int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 {
        struct c4iw_cq *chp;
-       int ret;
+       int ret = 0;
        unsigned long flag;
 
        chp = to_c4iw_cq(ibcq);
        spin_lock_irqsave(&chp->lock, flag);
-       ret = t4_arm_cq(&chp->cq,
-                       (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
+       t4_arm_cq(&chp->cq,
+                 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
+       if (flags & IB_CQ_REPORT_MISSED_EVENTS)
+               ret = t4_cq_notempty(&chp->cq);
        spin_unlock_irqrestore(&chp->lock, flag);
-       if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
-               ret = 0;
        return ret;
 }
index aa47e0a..6a9bef1 100644 (file)
@@ -881,15 +881,6 @@ static inline struct c4iw_listen_ep *to_listen_ep(struct iw_cm_id *cm_id)
        return cm_id->provider_data;
 }
 
-static inline int compute_wscale(int win)
-{
-       int wscale = 0;
-
-       while (wscale < 14 && (65535<<wscale) < win)
-               wscale++;
-       return wscale;
-}
-
 static inline int ocqp_supported(const struct cxgb4_lld_info *infop)
 {
 #if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64)
index edb1172..6904352 100644 (file)
@@ -683,7 +683,7 @@ static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr,
        return 0;
 }
 
-void _free_qp(struct kref *kref)
+static void _free_qp(struct kref *kref)
 {
        struct c4iw_qp *qhp;
 
index 6126bbe..02173f4 100644 (file)
@@ -634,6 +634,11 @@ static inline int t4_valid_cqe(struct t4_cq *cq, struct t4_cqe *cqe)
        return (CQE_GENBIT(cqe) == cq->gen);
 }
 
+static inline int t4_cq_notempty(struct t4_cq *cq)
+{
+       return cq->sw_in_use || t4_valid_cqe(cq, &cq->queue[cq->cidx]);
+}
+
 static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
 {
        int ret;
index 79575ee..0566393 100644 (file)
@@ -47,7 +47,6 @@
 #include <linux/topology.h>
 #include <linux/cpumask.h>
 #include <linux/module.h>
-#include <linux/cpumask.h>
 
 #include "hfi.h"
 #include "affinity.h"
@@ -682,7 +681,7 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
                           size_t count)
 {
        struct hfi1_affinity_node *entry;
-       struct cpumask mask;
+       cpumask_var_t mask;
        int ret, i;
 
        spin_lock(&node_affinity.lock);
@@ -692,19 +691,24 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
        if (!entry)
                return -EINVAL;
 
-       ret = cpulist_parse(buf, &mask);
+       ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+       if (!ret)
+               return -ENOMEM;
+
+       ret = cpulist_parse(buf, mask);
        if (ret)
-               return ret;
+               goto out;
 
-       if (!cpumask_subset(&mask, cpu_online_mask) || cpumask_empty(&mask)) {
+       if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) {
                dd_dev_warn(dd, "Invalid CPU mask\n");
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out;
        }
 
        mutex_lock(&sdma_affinity_mutex);
        /* reset the SDMA interrupt affinity details */
        init_cpu_mask_set(&entry->def_intr);
-       cpumask_copy(&entry->def_intr.mask, &mask);
+       cpumask_copy(&entry->def_intr.mask, mask);
        /*
         * Reassign the affinity for each SDMA interrupt.
         */
@@ -720,8 +724,9 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
                if (ret)
                        break;
        }
-
        mutex_unlock(&sdma_affinity_mutex);
+out:
+       free_cpumask_var(mask);
        return ret ? ret : strnlen(buf, PAGE_SIZE);
 }
 
index b32638d..cc38004 100644 (file)
@@ -9490,6 +9490,78 @@ static void init_lcb(struct hfi1_devdata *dd)
        write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0x00);
 }
 
+/*
+ * Perform a test read on the QSFP.  Return 0 on success, -ERRNO
+ * on error.
+ */
+static int test_qsfp_read(struct hfi1_pportdata *ppd)
+{
+       int ret;
+       u8 status;
+
+       /* report success if not a QSFP */
+       if (ppd->port_type != PORT_TYPE_QSFP)
+               return 0;
+
+       /* read byte 2, the status byte */
+       ret = one_qsfp_read(ppd, ppd->dd->hfi1_id, 2, &status, 1);
+       if (ret < 0)
+               return ret;
+       if (ret != 1)
+               return -EIO;
+
+       return 0; /* success */
+}
+
+/*
+ * Values for QSFP retry.
+ *
+ * Give up after 10s (20 x 500ms).  The overall timeout was empirically
+ * arrived at from experience on a large cluster.
+ */
+#define MAX_QSFP_RETRIES 20
+#define QSFP_RETRY_WAIT 500 /* msec */
+
+/*
+ * Try a QSFP read.  If it fails, schedule a retry for later.
+ * Called on first link activation after driver load.
+ */
+static void try_start_link(struct hfi1_pportdata *ppd)
+{
+       if (test_qsfp_read(ppd)) {
+               /* read failed */
+               if (ppd->qsfp_retry_count >= MAX_QSFP_RETRIES) {
+                       dd_dev_err(ppd->dd, "QSFP not responding, giving up\n");
+                       return;
+               }
+               dd_dev_info(ppd->dd,
+                           "QSFP not responding, waiting and retrying %d\n",
+                           (int)ppd->qsfp_retry_count);
+               ppd->qsfp_retry_count++;
+               queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work,
+                                  msecs_to_jiffies(QSFP_RETRY_WAIT));
+               return;
+       }
+       ppd->qsfp_retry_count = 0;
+
+       /*
+        * Tune the SerDes to a ballpark setting for optimal signal and bit
+        * error rate.  Needs to be done before starting the link.
+        */
+       tune_serdes(ppd);
+       start_link(ppd);
+}
+
+/*
+ * Workqueue function to start the link after a delay.
+ */
+void handle_start_link(struct work_struct *work)
+{
+       struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
+                                                 start_link_work.work);
+       try_start_link(ppd);
+}
+
 int bringup_serdes(struct hfi1_pportdata *ppd)
 {
        struct hfi1_devdata *dd = ppd->dd;
@@ -9525,14 +9597,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
                set_qsfp_int_n(ppd, 1);
        }
 
-       /*
-        * Tune the SerDes to a ballpark setting for
-        * optimal signal and bit error rate
-        * Needs to be done before starting the link
-        */
-       tune_serdes(ppd);
-
-       return start_link(ppd);
+       try_start_link(ppd);
+       return 0;
 }
 
 void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
@@ -9549,6 +9615,10 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
        ppd->driver_link_ready = 0;
        ppd->link_enabled = 0;
 
+       ppd->qsfp_retry_count = MAX_QSFP_RETRIES; /* prevent more retries */
+       flush_delayed_work(&ppd->start_link_work);
+       cancel_delayed_work_sync(&ppd->start_link_work);
+
        ppd->offline_disabled_reason =
                        HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
        set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
@@ -12865,7 +12935,7 @@ fail:
  */
 static int set_up_context_variables(struct hfi1_devdata *dd)
 {
-       int num_kernel_contexts;
+       unsigned long num_kernel_contexts;
        int total_contexts;
        int ret;
        unsigned ngroups;
@@ -12894,9 +12964,9 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
         */
        if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
                dd_dev_err(dd,
-                          "Reducing # kernel rcv contexts to: %d, from %d\n",
+                          "Reducing # kernel rcv contexts to: %d, from %lu\n",
                           (int)(dd->chip_send_contexts - num_vls - 1),
-                          (int)num_kernel_contexts);
+                          num_kernel_contexts);
                num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
        }
        /*
index ed11107..e295737 100644 (file)
@@ -706,6 +706,7 @@ void handle_link_up(struct work_struct *work);
 void handle_link_down(struct work_struct *work);
 void handle_link_downgrade(struct work_struct *work);
 void handle_link_bounce(struct work_struct *work);
+void handle_start_link(struct work_struct *work);
 void handle_sma_message(struct work_struct *work);
 void reset_qsfp(struct hfi1_pportdata *ppd);
 void qsfp_event(struct work_struct *work);
index dbab9d9..5e9be16 100644 (file)
 
 static struct dentry *hfi1_dbg_root;
 
+/* wrappers to enforce srcu in seq file */
+static ssize_t hfi1_seq_read(
+       struct file *file,
+       char __user *buf,
+       size_t size,
+       loff_t *ppos)
+{
+       struct dentry *d = file->f_path.dentry;
+       int srcu_idx;
+       ssize_t r;
+
+       r = debugfs_use_file_start(d, &srcu_idx);
+       if (likely(!r))
+               r = seq_read(file, buf, size, ppos);
+       debugfs_use_file_finish(srcu_idx);
+       return r;
+}
+
+static loff_t hfi1_seq_lseek(
+       struct file *file,
+       loff_t offset,
+       int whence)
+{
+       struct dentry *d = file->f_path.dentry;
+       int srcu_idx;
+       loff_t r;
+
+       r = debugfs_use_file_start(d, &srcu_idx);
+       if (likely(!r))
+               r = seq_lseek(file, offset, whence);
+       debugfs_use_file_finish(srcu_idx);
+       return r;
+}
+
 #define private2dd(file) (file_inode(file)->i_private)
 #define private2ppd(file) (file_inode(file)->i_private)
 
@@ -87,8 +121,8 @@ static int _##name##_open(struct inode *inode, struct file *s) \
 static const struct file_operations _##name##_file_ops = { \
        .owner   = THIS_MODULE, \
        .open    = _##name##_open, \
-       .read    = seq_read, \
-       .llseek  = seq_lseek, \
+       .read    = hfi1_seq_read, \
+       .llseek  = hfi1_seq_lseek, \
        .release = seq_release \
 }
 
@@ -105,11 +139,9 @@ do { \
        DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO)
 
 static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
 {
        struct hfi1_opcode_stats_perctx *opstats;
 
-       rcu_read_lock();
        if (*pos >= ARRAY_SIZE(opstats->stats))
                return NULL;
        return pos;
@@ -126,9 +158,7 @@ static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
 }
 
 static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
 {
-       rcu_read_unlock();
 }
 
 static int _opcode_stats_seq_show(struct seq_file *s, void *v)
@@ -223,28 +253,32 @@ DEBUGFS_SEQ_FILE_OPEN(ctx_stats)
 DEBUGFS_FILE_OPS(ctx_stats);
 
 static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
+       __acquires(RCU)
 {
        struct qp_iter *iter;
        loff_t n = *pos;
 
-       rcu_read_lock();
        iter = qp_iter_init(s->private);
+
+       /* stop calls rcu_read_unlock */
+       rcu_read_lock();
+
        if (!iter)
                return NULL;
 
-       while (n--) {
+       do {
                if (qp_iter_next(iter)) {
                        kfree(iter);
                        return NULL;
                }
-       }
+       } while (n--);
 
        return iter;
 }
 
 static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
                                loff_t *pos)
+       __must_hold(RCU)
 {
        struct qp_iter *iter = iter_ptr;
 
@@ -259,7 +293,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
 }
 
 static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
-__releases(RCU)
+       __releases(RCU)
 {
        rcu_read_unlock();
 }
@@ -281,12 +315,10 @@ DEBUGFS_SEQ_FILE_OPEN(qp_stats)
 DEBUGFS_FILE_OPS(qp_stats);
 
 static void *_sdes_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
 {
        struct hfi1_ibdev *ibd;
        struct hfi1_devdata *dd;
 
-       rcu_read_lock();
        ibd = (struct hfi1_ibdev *)s->private;
        dd = dd_from_dev(ibd);
        if (!dd->per_sdma || *pos >= dd->num_sdma)
@@ -306,9 +338,7 @@ static void *_sdes_seq_next(struct seq_file *s, void *v, loff_t *pos)
 }
 
 static void _sdes_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
 {
-       rcu_read_unlock();
 }
 
 static int _sdes_seq_show(struct seq_file *s, void *v)
@@ -335,11 +365,9 @@ static ssize_t dev_counters_read(struct file *file, char __user *buf,
        struct hfi1_devdata *dd;
        ssize_t rval;
 
-       rcu_read_lock();
        dd = private2dd(file);
        avail = hfi1_read_cntrs(dd, NULL, &counters);
        rval =  simple_read_from_buffer(buf, count, ppos, counters, avail);
-       rcu_read_unlock();
        return rval;
 }
 
@@ -352,11 +380,9 @@ static ssize_t dev_names_read(struct file *file, char __user *buf,
        struct hfi1_devdata *dd;
        ssize_t rval;
 
-       rcu_read_lock();
        dd = private2dd(file);
        avail = hfi1_read_cntrs(dd, &names, NULL);
        rval =  simple_read_from_buffer(buf, count, ppos, names, avail);
-       rcu_read_unlock();
        return rval;
 }
 
@@ -379,11 +405,9 @@ static ssize_t portnames_read(struct file *file, char __user *buf,
        struct hfi1_devdata *dd;
        ssize_t rval;
 
-       rcu_read_lock();
        dd = private2dd(file);
        avail = hfi1_read_portcntrs(dd->pport, &names, NULL);
        rval = simple_read_from_buffer(buf, count, ppos, names, avail);
-       rcu_read_unlock();
        return rval;
 }
 
@@ -396,11 +420,9 @@ static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf,
        struct hfi1_pportdata *ppd;
        ssize_t rval;
 
-       rcu_read_lock();
        ppd = private2ppd(file);
        avail = hfi1_read_portcntrs(ppd, NULL, &counters);
        rval = simple_read_from_buffer(buf, count, ppos, counters, avail);
-       rcu_read_unlock();
        return rval;
 }
 
@@ -430,16 +452,13 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf,
        int used;
        int i;
 
-       rcu_read_lock();
        ppd = private2ppd(file);
        dd = ppd->dd;
        size = PAGE_SIZE;
        used = 0;
        tmp = kmalloc(size, GFP_KERNEL);
-       if (!tmp) {
-               rcu_read_unlock();
+       if (!tmp)
                return -ENOMEM;
-       }
 
        scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
        used += scnprintf(tmp + used, size - used,
@@ -466,7 +485,6 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf,
        used += scnprintf(tmp + used, size - used, "Write bits to clear\n");
 
        ret = simple_read_from_buffer(buf, count, ppos, tmp, used);
-       rcu_read_unlock();
        kfree(tmp);
        return ret;
 }
@@ -482,15 +500,12 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf,
        u64 scratch0;
        u64 clear;
 
-       rcu_read_lock();
        ppd = private2ppd(file);
        dd = ppd->dd;
 
        buff = kmalloc(count + 1, GFP_KERNEL);
-       if (!buff) {
-               ret = -ENOMEM;
-               goto do_return;
-       }
+       if (!buff)
+               return -ENOMEM;
 
        ret = copy_from_user(buff, buf, count);
        if (ret > 0) {
@@ -523,8 +538,6 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf,
 
  do_free:
        kfree(buff);
- do_return:
-       rcu_read_unlock();
        return ret;
 }
 
@@ -538,18 +551,14 @@ static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf,
        char *tmp;
        int ret;
 
-       rcu_read_lock();
        ppd = private2ppd(file);
        tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
-       if (!tmp) {
-               rcu_read_unlock();
+       if (!tmp)
                return -ENOMEM;
-       }
 
        ret = qsfp_dump(ppd, tmp, PAGE_SIZE);
        if (ret > 0)
                ret = simple_read_from_buffer(buf, count, ppos, tmp, ret);
-       rcu_read_unlock();
        kfree(tmp);
        return ret;
 }
@@ -565,7 +574,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
        int offset;
        int total_written;
 
-       rcu_read_lock();
        ppd = private2ppd(file);
 
        /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
@@ -573,16 +581,12 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
        offset = *ppos & 0xffff;
 
        /* explicitly reject invalid address 0 to catch cp and cat */
-       if (i2c_addr == 0) {
-               ret = -EINVAL;
-               goto _return;
-       }
+       if (i2c_addr == 0)
+               return -EINVAL;
 
        buff = kmalloc(count, GFP_KERNEL);
-       if (!buff) {
-               ret = -ENOMEM;
-               goto _return;
-       }
+       if (!buff)
+               return -ENOMEM;
 
        ret = copy_from_user(buff, buf, count);
        if (ret > 0) {
@@ -602,8 +606,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
 
  _free:
        kfree(buff);
- _return:
-       rcu_read_unlock();
        return ret;
 }
 
@@ -632,7 +634,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
        int offset;
        int total_read;
 
-       rcu_read_lock();
        ppd = private2ppd(file);
 
        /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
@@ -640,16 +641,12 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
        offset = *ppos & 0xffff;
 
        /* explicitly reject invalid address 0 to catch cp and cat */
-       if (i2c_addr == 0) {
-               ret = -EINVAL;
-               goto _return;
-       }
+       if (i2c_addr == 0)
+               return -EINVAL;
 
        buff = kmalloc(count, GFP_KERNEL);
-       if (!buff) {
-               ret = -ENOMEM;
-               goto _return;
-       }
+       if (!buff)
+               return -ENOMEM;
 
        total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count);
        if (total_read < 0) {
@@ -669,8 +666,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
 
  _free:
        kfree(buff);
- _return:
-       rcu_read_unlock();
        return ret;
 }
 
@@ -697,26 +692,20 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
        int ret;
        int total_written;
 
-       rcu_read_lock();
-       if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */
-               ret = -EINVAL;
-               goto _return;
-       }
+       if (*ppos + count > QSFP_PAGESIZE * 4) /* base page + page00-page03 */
+               return -EINVAL;
 
        ppd = private2ppd(file);
 
        buff = kmalloc(count, GFP_KERNEL);
-       if (!buff) {
-               ret = -ENOMEM;
-               goto _return;
-       }
+       if (!buff)
+               return -ENOMEM;
 
        ret = copy_from_user(buff, buf, count);
        if (ret > 0) {
                ret = -EFAULT;
                goto _free;
        }
-
        total_written = qsfp_write(ppd, target, *ppos, buff, count);
        if (total_written < 0) {
                ret = total_written;
@@ -729,8 +718,6 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
 
  _free:
        kfree(buff);
- _return:
-       rcu_read_unlock();
        return ret;
 }
 
@@ -757,7 +744,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
        int ret;
        int total_read;
 
-       rcu_read_lock();
        if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */
                ret = -EINVAL;
                goto _return;
@@ -790,7 +776,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
  _free:
        kfree(buff);
  _return:
-       rcu_read_unlock();
        return ret;
 }
 
@@ -1006,7 +991,6 @@ void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
        debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
 out:
        ibd->hfi1_ibdev_dbg = NULL;
-       synchronize_rcu();
 }
 
 /*
@@ -1031,9 +1015,7 @@ static const char * const hfi1_statnames[] = {
 };
 
 static void *_driver_stats_names_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
 {
-       rcu_read_lock();
        if (*pos >= ARRAY_SIZE(hfi1_statnames))
                return NULL;
        return pos;
@@ -1051,9 +1033,7 @@ static void *_driver_stats_names_seq_next(
 }
 
 static void _driver_stats_names_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
 {
-       rcu_read_unlock();
 }
 
 static int _driver_stats_names_seq_show(struct seq_file *s, void *v)
@@ -1069,9 +1049,7 @@ DEBUGFS_SEQ_FILE_OPEN(driver_stats_names)
 DEBUGFS_FILE_OPS(driver_stats_names);
 
 static void *_driver_stats_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
 {
-       rcu_read_lock();
        if (*pos >= ARRAY_SIZE(hfi1_statnames))
                return NULL;
        return pos;
@@ -1086,9 +1064,7 @@ static void *_driver_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
 }
 
 static void _driver_stats_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
 {
-       rcu_read_unlock();
 }
 
 static u64 hfi1_sps_ints(void)
index 8246dc7..303f105 100644 (file)
@@ -888,14 +888,15 @@ void set_all_slowpath(struct hfi1_devdata *dd)
 }
 
 static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
-                                     struct hfi1_packet packet,
+                                     struct hfi1_packet *packet,
                                      struct hfi1_devdata *dd)
 {
        struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
-       struct hfi1_message_header *hdr = hfi1_get_msgheader(packet.rcd->dd,
-                                                            packet.rhf_addr);
+       struct hfi1_message_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
+                                                            packet->rhf_addr);
+       u8 etype = rhf_rcv_type(packet->rhf);
 
-       if (hdr2sc(hdr, packet.rhf) != 0xf) {
+       if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) {
                int hwstate = read_logical_state(dd);
 
                if (hwstate != LSTATE_ACTIVE) {
@@ -979,7 +980,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
                        /* Auto activate link on non-SC15 packet receive */
                        if (unlikely(rcd->ppd->host_link_state ==
                                     HLS_UP_ARMED) &&
-                           set_armed_to_active(rcd, packet, dd))
+                           set_armed_to_active(rcd, &packet, dd))
                                goto bail;
                        last = process_rcv_packet(&packet, thread);
                }
index 1ecbec1..7e03ccd 100644 (file)
@@ -183,6 +183,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
        if (fd) {
                fd->rec_cpu_num = -1; /* no cpu affinity by default */
                fd->mm = current->mm;
+               atomic_inc(&fd->mm->mm_count);
        }
 
        fp->private_data = fd;
@@ -222,7 +223,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
                ret = assign_ctxt(fp, &uinfo);
                if (ret < 0)
                        return ret;
-               setup_ctxt(fp);
+               ret = setup_ctxt(fp);
                if (ret)
                        return ret;
                ret = user_init(fp);
@@ -779,6 +780,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
        mutex_unlock(&hfi1_mutex);
        hfi1_free_ctxtdata(dd, uctxt);
 done:
+       mmdrop(fdata->mm);
        kobject_put(&dd->kobj);
        kfree(fdata);
        return 0;
index 1000e0f..325ec21 100644 (file)
@@ -605,6 +605,7 @@ struct hfi1_pportdata {
        struct work_struct freeze_work;
        struct work_struct link_downgrade_work;
        struct work_struct link_bounce_work;
+       struct delayed_work start_link_work;
        /* host link state variables */
        struct mutex hls_lock;
        u32 host_link_state;
@@ -659,6 +660,7 @@ struct hfi1_pportdata {
        u8 linkinit_reason;
        u8 local_tx_rate;       /* rate given to 8051 firmware */
        u8 last_pstate;         /* info only */
+       u8 qsfp_retry_count;
 
        /* placeholders for IB MAD packet settings */
        u8 overrun_threshold;
@@ -1272,9 +1274,26 @@ static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf)
               ((!!(rhf_dc_info(rhf))) << 4);
 }
 
+#define HFI1_JKEY_WIDTH       16
+#define HFI1_JKEY_MASK        (BIT(16) - 1)
+#define HFI1_ADMIN_JKEY_RANGE 32
+
+/*
+ * J_KEYs are split and allocated in the following groups:
+ *   0 - 31    - users with administrator privileges
+ *  32 - 63    - kernel protocols using KDETH packets
+ *  64 - 65535 - all other users using KDETH packets
+ */
 static inline u16 generate_jkey(kuid_t uid)
 {
-       return from_kuid(current_user_ns(), uid) & 0xffff;
+       u16 jkey = from_kuid(current_user_ns(), uid) & HFI1_JKEY_MASK;
+
+       if (capable(CAP_SYS_ADMIN))
+               jkey &= HFI1_ADMIN_JKEY_RANGE - 1;
+       else if (jkey < 64)
+               jkey |= BIT(HFI1_JKEY_WIDTH - 1);
+
+       return jkey;
 }
 
 /*
@@ -1656,7 +1675,6 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
 struct hfi1_devdata *hfi1_init_dd(struct pci_dev *,
                                  const struct pci_device_id *);
 void hfi1_free_devdata(struct hfi1_devdata *);
-void cc_state_reclaim(struct rcu_head *rcu);
 struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
 
 /* LED beaconing functions */
@@ -1788,7 +1806,7 @@ extern unsigned int hfi1_max_mtu;
 extern unsigned int hfi1_cu;
 extern unsigned int user_credit_return_threshold;
 extern int num_user_contexts;
-extern unsigned n_krcvqs;
+extern unsigned long n_krcvqs;
 extern uint krcvqs[];
 extern int krcvqsset;
 extern uint kdeth_qp;
index a358d23..384b43d 100644 (file)
@@ -94,7 +94,7 @@ module_param_array(krcvqs, uint, &krcvqsset, S_IRUGO);
 MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL");
 
 /* computed based on above array */
-unsigned n_krcvqs;
+unsigned long n_krcvqs;
 
 static unsigned hfi1_rcvarr_split = 25;
 module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO);
@@ -500,6 +500,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
        INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade);
        INIT_WORK(&ppd->sma_message_work, handle_sma_message);
        INIT_WORK(&ppd->link_bounce_work, handle_link_bounce);
+       INIT_DELAYED_WORK(&ppd->start_link_work, handle_start_link);
        INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work);
        INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
 
@@ -1333,7 +1334,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
                spin_unlock(&ppd->cc_state_lock);
 
                if (cc_state)
-                       call_rcu(&cc_state->rcu, cc_state_reclaim);
+                       kfree_rcu(cc_state, rcu);
        }
 
        free_credit_return(dd);
index 1263abe..7ffc14f 100644 (file)
@@ -1819,6 +1819,11 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
        u32 len = OPA_AM_CI_LEN(am) + 1;
        int ret;
 
+       if (dd->pport->port_type != PORT_TYPE_QSFP) {
+               smp->status |= IB_SMP_INVALID_FIELD;
+               return reply((struct ib_mad_hdr *)smp);
+       }
+
 #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
 #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
 #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
@@ -2599,7 +2604,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
        u8 lq, num_vls;
        u8 res_lli, res_ler;
        u64 port_mask;
-       unsigned long port_num;
+       u8 port_num;
        unsigned long vl;
        u32 vl_select_mask;
        int vfi;
@@ -2633,9 +2638,9 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
         */
        port_mask = be64_to_cpu(req->port_select_mask[3]);
        port_num = find_first_bit((unsigned long *)&port_mask,
-                                 sizeof(port_mask));
+                                 sizeof(port_mask) * 8);
 
-       if ((u8)port_num != port) {
+       if (port_num != port) {
                pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)pmp);
        }
@@ -2837,7 +2842,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
         */
        port_mask = be64_to_cpu(req->port_select_mask[3]);
        port_num = find_first_bit((unsigned long *)&port_mask,
-                                 sizeof(port_mask));
+                                 sizeof(port_mask) * 8);
 
        if (port_num != port) {
                pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3010,7 +3015,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
         */
        port_mask = be64_to_cpu(req->port_select_mask[3]);
        port_num = find_first_bit((unsigned long *)&port_mask,
-                                 sizeof(port_mask));
+                                 sizeof(port_mask) * 8);
 
        if (port_num != port) {
                pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3247,7 +3252,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
         */
        port_mask = be64_to_cpu(req->port_select_mask[3]);
        port_num = find_first_bit((unsigned long *)&port_mask,
-                                 sizeof(port_mask));
+                                 sizeof(port_mask) * 8);
 
        if (port_num != port) {
                pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3398,7 +3403,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd)
 
        spin_unlock(&ppd->cc_state_lock);
 
-       call_rcu(&old_cc_state->rcu, cc_state_reclaim);
+       kfree_rcu(old_cc_state, rcu);
 }
 
 static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
@@ -3553,13 +3558,6 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
        return reply((struct ib_mad_hdr *)smp);
 }
 
-void cc_state_reclaim(struct rcu_head *rcu)
-{
-       struct cc_state *cc_state = container_of(rcu, struct cc_state, rcu);
-
-       kfree(cc_state);
-}
-
 static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
                                   struct ib_device *ibdev, u8 port,
                                   u32 *resp_len)
index 8c25e1b..3a1ef30 100644 (file)
@@ -771,6 +771,9 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
                        read_extra_bytes(pbuf, from, to_fill);
                        from += to_fill;
                        nbytes -= to_fill;
+                       /* may not be enough valid bytes left to align */
+                       if (extra > nbytes)
+                               extra = nbytes;
 
                        /* ...now write carry */
                        dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
@@ -798,6 +801,15 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
                        read_low_bytes(pbuf, from, extra);
                        from += extra;
                        nbytes -= extra;
+                       /*
+                        * If no bytes are left, return early - we are done.
+                        * NOTE: This short-circuit is *required* because
+                        * "extra" may have been reduced in size and "from"
+                        * is not aligned, as required when leaving this
+                        * if block.
+                        */
+                       if (nbytes == 0)
+                               return;
                }
 
                /* at this point, from is QW aligned */
index a5aa351..4e4d831 100644 (file)
@@ -656,10 +656,6 @@ struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev)
 
        iter->dev = dev;
        iter->specials = dev->rdi.ibdev.phys_port_cnt * 2;
-       if (qp_iter_next(iter)) {
-               kfree(iter);
-               return NULL;
-       }
 
        return iter;
 }
index a207717..4e95ad8 100644 (file)
@@ -706,8 +706,8 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len,
                   u8 *data)
 {
        struct hfi1_pportdata *ppd;
-       u32 excess_len = 0;
-       int ret = 0;
+       u32 excess_len = len;
+       int ret = 0, offset = 0;
 
        if (port_num > dd->num_pports || port_num < 1) {
                dd_dev_info(dd, "%s: Invalid port number %d\n",
@@ -740,6 +740,34 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len,
        }
 
        memcpy(data, &ppd->qsfp_info.cache[addr], len);
+
+       if (addr <= QSFP_MONITOR_VAL_END &&
+           (addr + len) >= QSFP_MONITOR_VAL_START) {
+               /* Overlap with the dynamic channel monitor range */
+               if (addr < QSFP_MONITOR_VAL_START) {
+                       if (addr + len <= QSFP_MONITOR_VAL_END)
+                               len = addr + len - QSFP_MONITOR_VAL_START;
+                       else
+                               len = QSFP_MONITOR_RANGE;
+                       offset = QSFP_MONITOR_VAL_START - addr;
+                       addr = QSFP_MONITOR_VAL_START;
+               } else if (addr == QSFP_MONITOR_VAL_START) {
+                       offset = 0;
+                       if (addr + len > QSFP_MONITOR_VAL_END)
+                               len = QSFP_MONITOR_RANGE;
+               } else {
+                       offset = 0;
+                       if (addr + len > QSFP_MONITOR_VAL_END)
+                               len = QSFP_MONITOR_VAL_END - addr + 1;
+               }
+               /* Refresh the values of the dynamic monitors from the cable */
+               ret = one_qsfp_read(ppd, dd->hfi1_id, addr, data + offset, len);
+               if (ret != len) {
+                       ret = -EAGAIN;
+                       goto set_zeroes;
+               }
+       }
+
        return 0;
 
 set_zeroes:
index 69275eb..36cf523 100644 (file)
@@ -74,6 +74,9 @@
 /* Defined fields that Intel requires of qualified cables */
 /* Byte 0 is Identifier, not checked */
 /* Byte 1 is reserved "status MSB" */
+#define QSFP_MONITOR_VAL_START 22
+#define QSFP_MONITOR_VAL_END 81
+#define QSFP_MONITOR_RANGE (QSFP_MONITOR_VAL_END - QSFP_MONITOR_VAL_START + 1)
 #define QSFP_TX_CTRL_BYTE_OFFS 86
 #define QSFP_PWR_CTRL_BYTE_OFFS 93
 #define QSFP_CDR_CTRL_BYTE_OFFS 98
index 0ecf279..1694037 100644 (file)
@@ -114,6 +114,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
 #define KDETH_HCRC_LOWER_SHIFT    24
 #define KDETH_HCRC_LOWER_MASK     0xff
 
+#define AHG_KDETH_INTR_SHIFT 12
+
 #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
 #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
 
@@ -1480,7 +1482,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
                /* Clear KDETH.SH on last packet */
                if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) {
                        val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset,
-                                                               INTR) >> 16);
+                                                    INTR) <<
+                                          AHG_KDETH_INTR_SHIFT);
                        val &= cpu_to_le16(~(1U << 13));
                        AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
                } else {
index b738acd..8ec09e4 100644 (file)
@@ -232,7 +232,7 @@ struct i40iw_device {
        struct i40e_client *client;
        struct i40iw_hw hw;
        struct i40iw_cm_core cm_core;
-       unsigned long *mem_resources;
+       u8 *mem_resources;
        unsigned long *allocated_qps;
        unsigned long *allocated_cqs;
        unsigned long *allocated_mrs;
@@ -435,8 +435,8 @@ static inline int i40iw_alloc_resource(struct i40iw_device *iwdev,
        *next = resource_num + 1;
        if (*next == max_resources)
                *next = 0;
-       spin_unlock_irqrestore(&iwdev->resource_lock, flags);
        *req_resource_num = resource_num;
+       spin_unlock_irqrestore(&iwdev->resource_lock, flags);
 
        return 0;
 }
index 5026dc7..7ca0638 100644 (file)
@@ -535,8 +535,8 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
                buf += hdr_len;
        }
 
-       if (pd_len)
-               memcpy(buf, pdata->addr, pd_len);
+       if (pdata && pdata->addr)
+               memcpy(buf, pdata->addr, pdata->size);
 
        atomic_set(&sqbuf->refcount, 1);
 
@@ -3346,26 +3346,6 @@ int i40iw_cm_disconn(struct i40iw_qp *iwqp)
        return 0;
 }
 
-/**
- * i40iw_loopback_nop - Send a nop
- * @qp: associated hw qp
- */
-static void i40iw_loopback_nop(struct i40iw_sc_qp *qp)
-{
-       u64 *wqe;
-       u64 header;
-
-       wqe = qp->qp_uk.sq_base->elem;
-       set_64bit_val(wqe, 0, 0);
-       set_64bit_val(wqe, 8, 0);
-       set_64bit_val(wqe, 16, 0);
-
-       header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
-           LS_64(0, I40IWQPSQ_SIGCOMPL) |
-           LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
-       set_64bit_val(wqe, 24, header);
-}
-
 /**
  * i40iw_qp_disconnect - free qp and close cm
  * @iwqp: associate qp for the connection
@@ -3638,7 +3618,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        } else {
                if (iwqp->page)
                        iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
-               i40iw_loopback_nop(&iwqp->sc_qp);
+               dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, NULL, 0, 0);
        }
 
        if (iwqp->page)
index 3ee0cad..0c92a40 100644 (file)
@@ -265,6 +265,7 @@ void i40iw_next_iw_state(struct i40iw_qp *iwqp,
                info.dont_send_fin = false;
        if (iwqp->sc_qp.term_flags && (state == I40IW_QP_STATE_ERROR))
                info.reset_tcp_conn = true;
+       iwqp->hw_iwarp_state = state;
        i40iw_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0);
 }
 
index 6e90813..445e230 100644 (file)
@@ -100,7 +100,7 @@ static struct notifier_block i40iw_net_notifier = {
        .notifier_call = i40iw_net_event
 };
 
-static int i40iw_notifiers_registered;
+static atomic_t i40iw_notifiers_registered;
 
 /**
  * i40iw_find_i40e_handler - find a handler given a client info
@@ -1342,12 +1342,11 @@ exit:
  */
 static void i40iw_register_notifiers(void)
 {
-       if (!i40iw_notifiers_registered) {
+       if (atomic_inc_return(&i40iw_notifiers_registered) == 1) {
                register_inetaddr_notifier(&i40iw_inetaddr_notifier);
                register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
                register_netevent_notifier(&i40iw_net_notifier);
        }
-       i40iw_notifiers_registered++;
 }
 
 /**
@@ -1429,8 +1428,7 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del
                        i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
                /* fallthrough */
        case INET_NOTIFIER:
-               if (i40iw_notifiers_registered > 0) {
-                       i40iw_notifiers_registered--;
+               if (!atomic_dec_return(&i40iw_notifiers_registered)) {
                        unregister_netevent_notifier(&i40iw_net_notifier);
                        unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
                        unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
@@ -1558,6 +1556,10 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
        enum i40iw_status_code status;
        struct i40iw_handler *hdl;
 
+       hdl = i40iw_find_netdev(ldev->netdev);
+       if (hdl)
+               return 0;
+
        hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
        if (!hdl)
                return -ENOMEM;
index 0e8db0a..6fd043b 100644 (file)
@@ -673,8 +673,11 @@ enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw,
 {
        if (!mem)
                return I40IW_ERR_PARAM;
+       /*
+        * mem->va points to the parent of mem, so both mem and mem->va
+        * can not be touched once mem->va is freed
+        */
        kfree(mem->va);
-       mem->va = NULL;
        return 0;
 }
 
index 2360338..6329c97 100644 (file)
@@ -794,7 +794,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
        return &iwqp->ibqp;
 error:
        i40iw_free_qp_resources(iwdev, iwqp, qp_num);
-       kfree(mem);
        return ERR_PTR(err_code);
 }
 
@@ -1926,8 +1925,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr)
                }
                if (iwpbl->pbl_allocated)
                        i40iw_free_pble(iwdev->pble_rsrc, palloc);
-               kfree(iwpbl->iwmr);
-               iwpbl->iwmr = NULL;
+               kfree(iwmr);
                return 0;
        }
 
index d6fc8a6..5df63da 100644 (file)
@@ -576,8 +576,8 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
                checksum == cpu_to_be16(0xffff);
 }
 
-static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
-                          unsigned tail, struct mlx4_cqe *cqe, int is_eth)
+static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
+                           unsigned tail, struct mlx4_cqe *cqe, int is_eth)
 {
        struct mlx4_ib_proxy_sqp_hdr *hdr;
 
@@ -600,8 +600,6 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct
                wc->slid        = be16_to_cpu(hdr->tun.slid_mac_47_32);
                wc->sl          = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
        }
-
-       return 0;
 }
 
 static void mlx4_ib_qp_sw_comp(struct mlx4_ib_qp *qp, int num_entries,
@@ -689,12 +687,6 @@ repoll:
        is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
                MLX4_CQE_OPCODE_ERROR;
 
-       if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
-                    is_send)) {
-               pr_warn("Completion for NOP opcode detected!\n");
-               return -EINVAL;
-       }
-
        /* Resize CQ in progress */
        if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) {
                if (cq->resize_buf) {
@@ -720,12 +712,6 @@ repoll:
                 */
                mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
                                       be32_to_cpu(cqe->vlan_my_qpn));
-               if (unlikely(!mqp)) {
-                       pr_warn("CQ %06x with entry for unknown QPN %06x\n",
-                              cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
-                       return -EINVAL;
-               }
-
                *cur_qp = to_mibqp(mqp);
        }
 
@@ -738,11 +724,6 @@ repoll:
                /* SRQ is also in the radix tree */
                msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev,
                                       srq_num);
-               if (unlikely(!msrq)) {
-                       pr_warn("CQ %06x with entry for unknown SRQN %06x\n",
-                               cq->mcq.cqn, srq_num);
-                       return -EINVAL;
-               }
        }
 
        if (is_send) {
@@ -852,9 +833,11 @@ repoll:
                if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
                        if ((*cur_qp)->mlx4_ib_qp_type &
                            (MLX4_IB_QPT_PROXY_SMI_OWNER |
-                            MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
-                               return use_tunnel_data(*cur_qp, cq, wc, tail,
-                                                      cqe, is_eth);
+                            MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
+                               use_tunnel_data(*cur_qp, cq, wc, tail, cqe,
+                                               is_eth);
+                               return 0;
+                       }
                }
 
                wc->slid           = be16_to_cpu(cqe->rlid);
@@ -891,7 +874,6 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
        struct mlx4_ib_qp *cur_qp = NULL;
        unsigned long flags;
        int npolled;
-       int err = 0;
        struct mlx4_ib_dev *mdev = to_mdev(cq->ibcq.device);
 
        spin_lock_irqsave(&cq->lock, flags);
@@ -901,8 +883,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
        }
 
        for (npolled = 0; npolled < num_entries; ++npolled) {
-               err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled);
-               if (err)
+               if (mlx4_ib_poll_one(cq, &cur_qp, wc + npolled))
                        break;
        }
 
@@ -911,10 +892,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 out:
        spin_unlock_irqrestore(&cq->lock, flags);
 
-       if (err == 0 || err == -EAGAIN)
-               return npolled;
-       else
-               return err;
+       return npolled;
 }
 
 int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
index 308a358..5de9a65 100644 (file)
@@ -553,12 +553,6 @@ repoll:
                 * from the table.
                 */
                mqp = __mlx5_qp_lookup(dev->mdev, qpn);
-               if (unlikely(!mqp)) {
-                       mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n",
-                                    cq->mcq.cqn, qpn);
-                       return -EINVAL;
-               }
-
                *cur_qp = to_mibqp(mqp);
        }
 
@@ -619,13 +613,6 @@ repoll:
                read_lock(&dev->mdev->priv.mkey_table.lock);
                mmkey = __mlx5_mr_lookup(dev->mdev,
                                         mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
-               if (unlikely(!mmkey)) {
-                       read_unlock(&dev->mdev->priv.mkey_table.lock);
-                       mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
-                                    cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
-                       return -EINVAL;
-               }
-
                mr = to_mibmr(mmkey);
                get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
                mr->sig->sig_err_exists = true;
@@ -676,7 +663,6 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
        unsigned long flags;
        int soft_polled = 0;
        int npolled;
-       int err = 0;
 
        spin_lock_irqsave(&cq->lock, flags);
        if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
@@ -688,8 +674,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
                soft_polled = poll_soft_wc(cq, num_entries, wc);
 
        for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
-               err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled);
-               if (err)
+               if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled))
                        break;
        }
 
@@ -698,10 +683,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 out:
        spin_unlock_irqrestore(&cq->lock, flags);
 
-       if (err == 0 || err == -EAGAIN)
-               return soft_polled + npolled;
-       else
-               return err;
+       return soft_polled + npolled;
 }
 
 int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
@@ -747,14 +729,16 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
 
 static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
                          struct ib_ucontext *context, struct mlx5_ib_cq *cq,
-                         int entries, struct mlx5_create_cq_mbox_in **cqb,
+                         int entries, u32 **cqb,
                          int *cqe_size, int *index, int *inlen)
 {
        struct mlx5_ib_create_cq ucmd;
        size_t ucmdlen;
        int page_shift;
+       __be64 *pas;
        int npages;
        int ncont;
+       void *cqc;
        int err;
 
        ucmdlen =
@@ -792,14 +776,20 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
        mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
                    ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
 
-       *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont;
+       *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+                MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
        *cqb = mlx5_vzalloc(*inlen);
        if (!*cqb) {
                err = -ENOMEM;
                goto err_db;
        }
-       mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
-       (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+
+       pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
+       mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0);
+
+       cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
+       MLX5_SET(cqc, cqc, log_page_size,
+                page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 
        *index = to_mucontext(context)->uuari.uars[0].index;
 
@@ -834,9 +824,10 @@ static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
 
 static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
                            int entries, int cqe_size,
-                           struct mlx5_create_cq_mbox_in **cqb,
-                           int *index, int *inlen)
+                           u32 **cqb, int *index, int *inlen)
 {
+       __be64 *pas;
+       void *cqc;
        int err;
 
        err = mlx5_db_alloc(dev->mdev, &cq->db);
@@ -853,15 +844,21 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 
        init_cq_buf(cq, &cq->buf);
 
-       *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
+       *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+                MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
        *cqb = mlx5_vzalloc(*inlen);
        if (!*cqb) {
                err = -ENOMEM;
                goto err_buf;
        }
-       mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
 
-       (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+       pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
+       mlx5_fill_page_array(&cq->buf.buf, pas);
+
+       cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
+       MLX5_SET(cqc, cqc, log_page_size,
+                cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+
        *index = dev->mdev->priv.uuari.uars[0].index;
 
        return 0;
@@ -895,11 +892,12 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
 {
        int entries = attr->cqe;
        int vector = attr->comp_vector;
-       struct mlx5_create_cq_mbox_in *cqb = NULL;
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
        struct mlx5_ib_cq *cq;
        int uninitialized_var(index);
        int uninitialized_var(inlen);
+       u32 *cqb = NULL;
+       void *cqc;
        int cqe_size;
        unsigned int irqn;
        int eqn;
@@ -945,19 +943,20 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
                INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
        }
 
-       cq->cqe_size = cqe_size;
-       cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
-
-       if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
-               cqb->ctx.cqe_sz_flags |= (1 << 1);
-
-       cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
        err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
        if (err)
                goto err_cqb;
 
-       cqb->ctx.c_eqn = cpu_to_be16(eqn);
-       cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma);
+       cq->cqe_size = cqe_size;
+
+       cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context);
+       MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size));
+       MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
+       MLX5_SET(cqc, cqc, uar_page, index);
+       MLX5_SET(cqc, cqc, c_eqn, eqn);
+       MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
+       if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
+               MLX5_SET(cqc, cqc, oi, 1);
 
        err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen);
        if (err)
@@ -1088,27 +1087,15 @@ void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
 
 int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
 {
-       struct mlx5_modify_cq_mbox_in *in;
        struct mlx5_ib_dev *dev = to_mdev(cq->device);
        struct mlx5_ib_cq *mcq = to_mcq(cq);
        int err;
-       u32 fsel;
 
        if (!MLX5_CAP_GEN(dev->mdev, cq_moderation))
                return -ENOSYS;
 
-       in = kzalloc(sizeof(*in), GFP_KERNEL);
-       if (!in)
-               return -ENOMEM;
-
-       in->cqn = cpu_to_be32(mcq->mcq.cqn);
-       fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT);
-       in->ctx.cq_period = cpu_to_be16(cq_period);
-       in->ctx.cq_max_count = cpu_to_be16(cq_count);
-       in->field_select = cpu_to_be32(fsel);
-       err = mlx5_core_modify_cq(dev->mdev, &mcq->mcq, in, sizeof(*in));
-       kfree(in);
-
+       err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq,
+                                            cq_period, cq_count);
        if (err)
                mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);
 
@@ -1241,9 +1228,11 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 {
        struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
        struct mlx5_ib_cq *cq = to_mcq(ibcq);
-       struct mlx5_modify_cq_mbox_in *in;
+       void *cqc;
+       u32 *in;
        int err;
        int npas;
+       __be64 *pas;
        int page_shift;
        int inlen;
        int uninitialized_var(cqe_size);
@@ -1285,28 +1274,37 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
        if (err)
                goto ex;
 
-       inlen = sizeof(*in) + npas * sizeof(in->pas[0]);
+       inlen = MLX5_ST_SZ_BYTES(modify_cq_in) +
+               MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas;
+
        in = mlx5_vzalloc(inlen);
        if (!in) {
                err = -ENOMEM;
                goto ex_resize;
        }
 
+       pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas);
        if (udata)
                mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
-                                    in->pas, 0);
+                                    pas, 0);
        else
-               mlx5_fill_page_array(&cq->resize_buf->buf, in->pas);
-
-       in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE  |
-                                      MLX5_MODIFY_CQ_MASK_PG_OFFSET |
-                                      MLX5_MODIFY_CQ_MASK_PG_SIZE);
-       in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
-       in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
-       in->ctx.page_offset = 0;
-       in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24);
-       in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE);
-       in->cqn = cpu_to_be32(cq->mcq.cqn);
+               mlx5_fill_page_array(&cq->resize_buf->buf, pas);
+
+       MLX5_SET(modify_cq_in, in,
+                modify_field_select_resize_field_select.resize_field_select.resize_field_select,
+                MLX5_MODIFY_CQ_MASK_LOG_SIZE  |
+                MLX5_MODIFY_CQ_MASK_PG_OFFSET |
+                MLX5_MODIFY_CQ_MASK_PG_SIZE);
+
+       cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context);
+
+       MLX5_SET(cqc, cqc, log_page_size,
+                page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+       MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size));
+       MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
+
+       MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE);
+       MLX5_SET(modify_cq_in, in, cqn, cq->mcq.cqn);
 
        err = mlx5_core_modify_cq(dev->mdev, &cq->mcq, in, inlen);
        if (err)
index a84bb76..e4aecbf 100644 (file)
@@ -37,7 +37,6 @@
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
-#include <linux/io-mapping.h>
 #if defined(CONFIG_X86)
 #include <asm/pat.h>
 #endif
@@ -233,23 +232,19 @@ static int set_roce_addr(struct ib_device *device, u8 port_num,
                         const union ib_gid *gid,
                         const struct ib_gid_attr *attr)
 {
-       struct mlx5_ib_dev *dev = to_mdev(device);
-       u32  in[MLX5_ST_SZ_DW(set_roce_address_in)];
-       u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
+       struct mlx5_ib_dev *dev = to_mdev(device);
+       u32  in[MLX5_ST_SZ_DW(set_roce_address_in)]  = {0};
+       u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
        void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
        enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
 
        if (ll != IB_LINK_LAYER_ETHERNET)
                return -EINVAL;
 
-       memset(in, 0, sizeof(in));
-
        ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
 
        MLX5_SET(set_roce_address_in, in, roce_address_index, index);
        MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
-
-       memset(out, 0, sizeof(out));
        return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
 }
 
@@ -752,8 +747,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
                                     &props->active_width);
        if (err)
                goto out;
-       err = mlx5_query_port_proto_oper(mdev, &props->active_speed, MLX5_PTYS_IB,
-                                        port);
+       err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port);
        if (err)
                goto out;
 
@@ -1850,6 +1844,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
                                           int domain)
 {
        struct mlx5_ib_dev *dev = to_mdev(qp->device);
+       struct mlx5_ib_qp *mqp = to_mqp(qp);
        struct mlx5_ib_flow_handler *handler = NULL;
        struct mlx5_flow_destination *dst = NULL;
        struct mlx5_ib_flow_prio *ft_prio;
@@ -1876,7 +1871,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
        }
 
        dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-       dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
+       if (mqp->flags & MLX5_IB_QP_RSS)
+               dst->tir_num = mqp->rss_qp.tirn;
+       else
+               dst->tir_num = mqp->raw_packet_qp.rq.tirn;
 
        if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
                if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)  {
index 40df2cc..996b54e 100644 (file)
@@ -71,7 +71,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
 
        addr = addr >> page_shift;
        tmp = (unsigned long)addr;
-       m = find_first_bit(&tmp, sizeof(tmp));
+       m = find_first_bit(&tmp, BITS_PER_LONG);
        skip = 1 << m;
        mask = skip - 1;
        i = 0;
@@ -81,7 +81,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
                for (k = 0; k < len; k++) {
                        if (!(i & mask)) {
                                tmp = (unsigned long)pfn;
-                               m = min_t(unsigned long, m, find_first_bit(&tmp, sizeof(tmp)));
+                               m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG));
                                skip = 1 << m;
                                mask = skip - 1;
                                base = pfn;
@@ -89,7 +89,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
                        } else {
                                if (base + p != pfn) {
                                        tmp = (unsigned long)p;
-                                       m = find_first_bit(&tmp, sizeof(tmp));
+                                       m = find_first_bit(&tmp, BITS_PER_LONG);
                                        skip = 1 << m;
                                        mask = skip - 1;
                                        base = pfn;
index 372385d..67cc741 100644 (file)
@@ -402,6 +402,7 @@ enum mlx5_ib_qp_flags {
        /* QP uses 1 as its source QP number */
        MLX5_IB_QP_SQPN_QP1                     = 1 << 6,
        MLX5_IB_QP_CAP_SCATTER_FCS              = 1 << 7,
+       MLX5_IB_QP_RSS                          = 1 << 8,
 };
 
 struct mlx5_umr_wr {
@@ -504,7 +505,7 @@ struct mlx5_ib_mr {
        int                     umred;
        int                     npages;
        struct mlx5_ib_dev     *dev;
-       struct mlx5_create_mkey_mbox_out out;
+       u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
        struct mlx5_core_sig_ctx    *sig;
        int                     live;
        void                    *descs_alloc;
index 4b02130..6f7e347 100644 (file)
@@ -135,20 +135,10 @@ static void reg_mr_callback(int status, void *context)
                return;
        }
 
-       if (mr->out.hdr.status) {
-               mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
-                            mr->out.hdr.status,
-                            be32_to_cpu(mr->out.hdr.syndrome));
-               kfree(mr);
-               dev->fill_delay = 1;
-               mod_timer(&dev->delay_timer, jiffies + HZ);
-               return;
-       }
-
        spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
        key = dev->mdev->priv.mkey_key++;
        spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
-       mr->mmkey.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
+       mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
 
        cache->last_add = jiffies;
 
@@ -170,16 +160,19 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 {
        struct mlx5_mr_cache *cache = &dev->cache;
        struct mlx5_cache_ent *ent = &cache->ent[c];
-       struct mlx5_create_mkey_mbox_in *in;
+       int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
        struct mlx5_ib_mr *mr;
        int npages = 1 << ent->order;
+       void *mkc;
+       u32 *in;
        int err = 0;
        int i;
 
-       in = kzalloc(sizeof(*in), GFP_KERNEL);
+       in = kzalloc(inlen, GFP_KERNEL);
        if (!in)
                return -ENOMEM;
 
+       mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
        for (i = 0; i < num; i++) {
                if (ent->pending >= MAX_PENDING_REG_MR) {
                        err = -EAGAIN;
@@ -194,18 +187,22 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
                mr->order = ent->order;
                mr->umred = 1;
                mr->dev = dev;
-               in->seg.status = MLX5_MKEY_STATUS_FREE;
-               in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
-               in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-               in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
-               in->seg.log2_page_size = 12;
+
+               MLX5_SET(mkc, mkc, free, 1);
+               MLX5_SET(mkc, mkc, umr_en, 1);
+               MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+
+               MLX5_SET(mkc, mkc, qpn, 0xffffff);
+               MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
+               MLX5_SET(mkc, mkc, log_page_size, 12);
 
                spin_lock_irq(&ent->lock);
                ent->pending++;
                spin_unlock_irq(&ent->lock);
-               err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in,
-                                           sizeof(*in), reg_mr_callback,
-                                           mr, &mr->out);
+               err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
+                                              in, inlen,
+                                              mr->out, sizeof(mr->out),
+                                              reg_mr_callback, mr);
                if (err) {
                        spin_lock_irq(&ent->lock);
                        ent->pending--;
@@ -670,30 +667,38 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
        struct mlx5_core_dev *mdev = dev->mdev;
-       struct mlx5_create_mkey_mbox_in *in;
-       struct mlx5_mkey_seg *seg;
        struct mlx5_ib_mr *mr;
+       void *mkc;
+       u32 *in;
        int err;
 
        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
        if (!mr)
                return ERR_PTR(-ENOMEM);
 
-       in = kzalloc(sizeof(*in), GFP_KERNEL);
+       in = kzalloc(inlen, GFP_KERNEL);
        if (!in) {
                err = -ENOMEM;
                goto err_free;
        }
 
-       seg = &in->seg;
-       seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
-       seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
-       seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-       seg->start_addr = 0;
+       mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+       MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA);
+       MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
+       MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
+       MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
+       MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
+       MLX5_SET(mkc, mkc, lr, 1);
 
-       err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL,
-                                   NULL);
+       MLX5_SET(mkc, mkc, length64, 1);
+       MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+       MLX5_SET(mkc, mkc, qpn, 0xffffff);
+       MLX5_SET64(mkc, mkc, start_addr, 0);
+
+       err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
        if (err)
                goto err_in;
 
@@ -1063,9 +1068,11 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
                                     int page_shift, int access_flags)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
-       struct mlx5_create_mkey_mbox_in *in;
        struct mlx5_ib_mr *mr;
+       __be64 *pas;
+       void *mkc;
        int inlen;
+       u32 *in;
        int err;
        bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
 
@@ -1073,31 +1080,41 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
        if (!mr)
                return ERR_PTR(-ENOMEM);
 
-       inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
+       inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
+               sizeof(*pas) * ((npages + 1) / 2) * 2;
        in = mlx5_vzalloc(inlen);
        if (!in) {
                err = -ENOMEM;
                goto err_1;
        }
-       mlx5_ib_populate_pas(dev, umem, page_shift, in->pas,
+       pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+       mlx5_ib_populate_pas(dev, umem, page_shift, pas,
                             pg_cap ? MLX5_IB_MTT_PRESENT : 0);
 
-       /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
+       /* The pg_access bit allows setting the access flags
         * in the page list submitted with the command. */
-       in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0;
-       in->seg.flags = convert_access(access_flags) |
-               MLX5_ACCESS_MODE_MTT;
-       in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
-       in->seg.start_addr = cpu_to_be64(virt_addr);
-       in->seg.len = cpu_to_be64(length);
-       in->seg.bsfs_octo_size = 0;
-       in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
-       in->seg.log2_page_size = page_shift;
-       in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-       in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
-                                                        1 << page_shift));
-       err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen, NULL,
-                                   NULL, NULL);
+       MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
+
+       mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+       MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+       MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
+       MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
+       MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
+       MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
+       MLX5_SET(mkc, mkc, lr, 1);
+
+       MLX5_SET64(mkc, mkc, start_addr, virt_addr);
+       MLX5_SET64(mkc, mkc, len, length);
+       MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+       MLX5_SET(mkc, mkc, bsf_octword_size, 0);
+       MLX5_SET(mkc, mkc, translations_octword_size,
+                get_octo_len(virt_addr, length, 1 << page_shift));
+       MLX5_SET(mkc, mkc, log_page_size, page_shift);
+       MLX5_SET(mkc, mkc, qpn, 0xffffff);
+       MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
+                get_octo_len(virt_addr, length, 1 << page_shift));
+
+       err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
        if (err) {
                mlx5_ib_warn(dev, "create mkey failed\n");
                goto err_2;
@@ -1523,30 +1540,32 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
                               u32 max_num_sg)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
-       struct mlx5_create_mkey_mbox_in *in;
-       struct mlx5_ib_mr *mr;
+       int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
        int ndescs = ALIGN(max_num_sg, 4);
+       struct mlx5_ib_mr *mr;
+       void *mkc;
+       u32 *in;
        int err;
 
        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
        if (!mr)
                return ERR_PTR(-ENOMEM);
 
-       in = kzalloc(sizeof(*in), GFP_KERNEL);
+       in = kzalloc(inlen, GFP_KERNEL);
        if (!in) {
                err = -ENOMEM;
                goto err_free;
        }
 
-       in->seg.status = MLX5_MKEY_STATUS_FREE;
-       in->seg.xlt_oct_size = cpu_to_be32(ndescs);
-       in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-       in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
+       mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+       MLX5_SET(mkc, mkc, free, 1);
+       MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
+       MLX5_SET(mkc, mkc, qpn, 0xffffff);
+       MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
 
        if (mr_type == IB_MR_TYPE_MEM_REG) {
-               mr->access_mode = MLX5_ACCESS_MODE_MTT;
-               in->seg.log2_page_size = PAGE_SHIFT;
-
+               mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
+               MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
                err = mlx5_alloc_priv_descs(pd->device, mr,
                                            ndescs, sizeof(u64));
                if (err)
@@ -1555,7 +1574,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
                mr->desc_size = sizeof(u64);
                mr->max_descs = ndescs;
        } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
-               mr->access_mode = MLX5_ACCESS_MODE_KLM;
+               mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
 
                err = mlx5_alloc_priv_descs(pd->device, mr,
                                            ndescs, sizeof(struct mlx5_klm));
@@ -1566,9 +1585,8 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
        } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
                u32 psv_index[2];
 
-               in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) |
-                                                          MLX5_MKEY_BSF_EN);
-               in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
+               MLX5_SET(mkc, mkc, bsf_en, 1);
+               MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
                mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
                if (!mr->sig) {
                        err = -ENOMEM;
@@ -1581,7 +1599,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
                if (err)
                        goto err_free_sig;
 
-               mr->access_mode = MLX5_ACCESS_MODE_KLM;
+               mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
                mr->sig->psv_memory.psv_idx = psv_index[0];
                mr->sig->psv_wire.psv_idx = psv_index[1];
 
@@ -1595,9 +1613,10 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
                goto err_free_in;
        }
 
-       in->seg.flags = MLX5_PERM_UMR_EN | mr->access_mode;
-       err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in),
-                                   NULL, NULL, NULL);
+       MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
+       MLX5_SET(mkc, mkc, umr_en, 1);
+
+       err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
        if (err)
                goto err_destroy_psv;
 
@@ -1633,8 +1652,10 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
                               struct ib_udata *udata)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
-       struct mlx5_create_mkey_mbox_in *in = NULL;
+       int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
        struct mlx5_ib_mw *mw = NULL;
+       u32 *in = NULL;
+       void *mkc;
        int ndescs;
        int err;
        struct mlx5_ib_alloc_mw req = {};
@@ -1658,23 +1679,24 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
        ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
 
        mw = kzalloc(sizeof(*mw), GFP_KERNEL);
-       in = kzalloc(sizeof(*in), GFP_KERNEL);
+       in = kzalloc(inlen, GFP_KERNEL);
        if (!mw || !in) {
                err = -ENOMEM;
                goto free;
        }
 
-       in->seg.status = MLX5_MKEY_STATUS_FREE;
-       in->seg.xlt_oct_size = cpu_to_be32(ndescs);
-       in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
-       in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_KLM |
-               MLX5_PERM_LOCAL_READ;
-       if (type == IB_MW_TYPE_2)
-               in->seg.flags_pd |= cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
-       in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-
-       err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, sizeof(*in),
-                                   NULL, NULL, NULL);
+       mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+       MLX5_SET(mkc, mkc, free, 1);
+       MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
+       MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+       MLX5_SET(mkc, mkc, umr_en, 1);
+       MLX5_SET(mkc, mkc, lr, 1);
+       MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_KLMS);
+       MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
+       MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+       err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen);
        if (err)
                goto free;
 
@@ -1811,7 +1833,7 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
                                   mr->desc_size * mr->max_descs,
                                   DMA_TO_DEVICE);
 
-       if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+       if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
                n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
        else
                n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
index 0dd7d93..9529b46 100644 (file)
@@ -726,7 +726,7 @@ err_umem:
 static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                          struct mlx5_ib_qp *qp, struct ib_udata *udata,
                          struct ib_qp_init_attr *attr,
-                         struct mlx5_create_qp_mbox_in **in,
+                         u32 **in,
                          struct mlx5_ib_create_qp_resp *resp, int *inlen,
                          struct mlx5_ib_qp_base *base)
 {
@@ -739,6 +739,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        u32 offset = 0;
        int uuarn;
        int ncont = 0;
+       __be64 *pas;
+       void *qpc;
        int err;
 
        err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
@@ -795,20 +797,24 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                ubuffer->umem = NULL;
        }
 
-       *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
+       *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+                MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * ncont;
        *in = mlx5_vzalloc(*inlen);
        if (!*in) {
                err = -ENOMEM;
                goto err_umem;
        }
+
+       pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
        if (ubuffer->umem)
-               mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift,
-                                    (*in)->pas, 0);
-       (*in)->ctx.log_pg_sz_remote_qpn =
-               cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
-       (*in)->ctx.params2 = cpu_to_be32(offset << 6);
+               mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
+
+       qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
+
+       MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+       MLX5_SET(qpc, qpc, page_offset, offset);
 
-       (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+       MLX5_SET(qpc, qpc, uar_page, uar_index);
        resp->uuar_index = uuarn;
        qp->uuarn = uuarn;
 
@@ -857,12 +863,13 @@ static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp,
 static int create_kernel_qp(struct mlx5_ib_dev *dev,
                            struct ib_qp_init_attr *init_attr,
                            struct mlx5_ib_qp *qp,
-                           struct mlx5_create_qp_mbox_in **in, int *inlen,
+                           u32 **in, int *inlen,
                            struct mlx5_ib_qp_base *base)
 {
        enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
        struct mlx5_uuar_info *uuari;
        int uar_index;
+       void *qpc;
        int uuarn;
        int err;
 
@@ -902,25 +909,29 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
        }
 
        qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
-       *inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages;
+       *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+                MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages;
        *in = mlx5_vzalloc(*inlen);
        if (!*in) {
                err = -ENOMEM;
                goto err_buf;
        }
-       (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
-       (*in)->ctx.log_pg_sz_remote_qpn =
-               cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
+
+       qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
+       MLX5_SET(qpc, qpc, uar_page, uar_index);
+       MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+
        /* Set "fast registration enabled" for all kernel QPs */
-       (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
-       (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
+       MLX5_SET(qpc, qpc, fre, 1);
+       MLX5_SET(qpc, qpc, rlky, 1);
 
        if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) {
-               (*in)->ctx.deth_sqpn = cpu_to_be32(1);
+               MLX5_SET(qpc, qpc, deth_sqpn, 1);
                qp->flags |= MLX5_IB_QP_SQPN_QP1;
        }
 
-       mlx5_fill_page_array(&qp->buf, (*in)->pas);
+       mlx5_fill_page_array(&qp->buf,
+                            (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas));
 
        err = mlx5_db_alloc(dev->mdev, &qp->db);
        if (err) {
@@ -974,15 +985,15 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
        free_uuar(&dev->mdev->priv.uuari, qp->bf->uuarn);
 }
 
-static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
+static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
 {
        if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) ||
            (attr->qp_type == IB_QPT_XRC_INI))
-               return cpu_to_be32(MLX5_SRQ_RQ);
+               return MLX5_SRQ_RQ;
        else if (!qp->has_rq)
-               return cpu_to_be32(MLX5_ZERO_LEN_RQ);
+               return MLX5_ZERO_LEN_RQ;
        else
-               return cpu_to_be32(MLX5_NON_ZERO_RQ);
+               return MLX5_NON_ZERO_RQ;
 }
 
 static int is_connected(enum ib_qp_type qp_type)
@@ -996,13 +1007,10 @@ static int is_connected(enum ib_qp_type qp_type)
 static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
                                    struct mlx5_ib_sq *sq, u32 tdn)
 {
-       u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+       u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
        void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 
-       memset(in, 0, sizeof(in));
-
        MLX5_SET(tisc, tisc, transport_domain, tdn);
-
        return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
 }
 
@@ -1191,7 +1199,7 @@ static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
 }
 
 static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
-                               struct mlx5_create_qp_mbox_in *in,
+                               u32 *in,
                                struct ib_pd *pd)
 {
        struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
@@ -1449,6 +1457,7 @@ create_tir:
        kvfree(in);
        /* qpn is reserved for that QP */
        qp->trans_qp.base.mqp.qpn = 0;
+       qp->flags |= MLX5_IB_QP_RSS;
        return 0;
 
 err:
@@ -1461,18 +1470,18 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                            struct ib_udata *udata, struct mlx5_ib_qp *qp)
 {
        struct mlx5_ib_resources *devr = &dev->devr;
+       int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
        struct mlx5_core_dev *mdev = dev->mdev;
-       struct mlx5_ib_qp_base *base;
        struct mlx5_ib_create_qp_resp resp;
-       struct mlx5_create_qp_mbox_in *in;
-       struct mlx5_ib_create_qp ucmd;
        struct mlx5_ib_cq *send_cq;
        struct mlx5_ib_cq *recv_cq;
        unsigned long flags;
-       int inlen = sizeof(*in);
-       int err;
        u32 uidx = MLX5_IB_DEFAULT_UIDX;
+       struct mlx5_ib_create_qp ucmd;
+       struct mlx5_ib_qp_base *base;
        void *qpc;
+       u32 *in;
+       int err;
 
        base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
               &qp->raw_packet_qp.rq.base :
@@ -1600,7 +1609,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                if (err)
                        return err;
        } else {
-               in = mlx5_vzalloc(sizeof(*in));
+               in = mlx5_vzalloc(inlen);
                if (!in)
                        return -ENOMEM;
 
@@ -1610,26 +1619,29 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        if (is_sqp(init_attr->qp_type))
                qp->port = init_attr->port_num;
 
-       in->ctx.flags = cpu_to_be32(to_mlx5_st(init_attr->qp_type) << 16 |
-                                   MLX5_QP_PM_MIGRATED << 11);
+       qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+
+       MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type));
+       MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
 
        if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
-               in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn);
+               MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn);
        else
-               in->ctx.flags_pd = cpu_to_be32(MLX5_QP_LAT_SENSITIVE);
+               MLX5_SET(qpc, qpc, latency_sensitive, 1);
+
 
        if (qp->wq_sig)
-               in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG);
+               MLX5_SET(qpc, qpc, wq_signature, 1);
 
        if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
-               in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
+               MLX5_SET(qpc, qpc, block_lb_mc, 1);
 
        if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
-               in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER);
+               MLX5_SET(qpc, qpc, cd_master, 1);
        if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
-               in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND);
+               MLX5_SET(qpc, qpc, cd_slave_send, 1);
        if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
-               in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV);
+               MLX5_SET(qpc, qpc, cd_slave_receive, 1);
 
        if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
                int rcqe_sz;
@@ -1639,71 +1651,68 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
 
                if (rcqe_sz == 128)
-                       in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE;
+                       MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
                else
-                       in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE;
+                       MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE);
 
                if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) {
                        if (scqe_sz == 128)
-                               in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE;
+                               MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE);
                        else
-                               in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE;
+                               MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE);
                }
        }
 
        if (qp->rq.wqe_cnt) {
-               in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4);
-               in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3;
+               MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
+               MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
        }
 
-       in->ctx.rq_type_srqn = get_rx_type(qp, init_attr);
+       MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
 
        if (qp->sq.wqe_cnt)
-               in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11);
+               MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
        else
-               in->ctx.sq_crq_size |= cpu_to_be16(0x8000);
+               MLX5_SET(qpc, qpc, no_sq, 1);
 
        /* Set default resources */
        switch (init_attr->qp_type) {
        case IB_QPT_XRC_TGT:
-               in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
-               in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
-               in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
-               in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn);
+               MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
+               MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn);
+               MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
+               MLX5_SET(qpc, qpc, xrcd, to_mxrcd(init_attr->xrcd)->xrcdn);
                break;
        case IB_QPT_XRC_INI:
-               in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
-               in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
-               in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
+               MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
+               MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+               MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
                break;
        default:
                if (init_attr->srq) {
-                       in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn);
-                       in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn);
+                       MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn);
+                       MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn);
                } else {
-                       in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
-                       in->ctx.rq_type_srqn |=
-                               cpu_to_be32(to_msrq(devr->s1)->msrq.srqn);
+                       MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+                       MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn);
                }
        }
 
        if (init_attr->send_cq)
-               in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn);
+               MLX5_SET(qpc, qpc, cqn_snd, to_mcq(init_attr->send_cq)->mcq.cqn);
 
        if (init_attr->recv_cq)
-               in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn);
+               MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(init_attr->recv_cq)->mcq.cqn);
 
-       in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
+       MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
 
-       if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
-               qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
-               /* 0xffffff means we ask to work with cqe version 0 */
+       /* 0xffffff means we ask to work with cqe version 0 */
+       if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
                MLX5_SET(qpc, qpc, user_index, uidx);
-       }
+
        /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
        if (init_attr->qp_type == IB_QPT_UD &&
            (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) {
-               qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
                MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1);
                qp->flags |= MLX5_IB_QP_LSO;
        }
@@ -1860,7 +1869,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
 {
        struct mlx5_ib_cq *send_cq, *recv_cq;
        struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
-       struct mlx5_modify_qp_mbox_in *in;
        unsigned long flags;
        int err;
 
@@ -1873,16 +1881,12 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
               &qp->raw_packet_qp.rq.base :
               &qp->trans_qp.base;
 
-       in = kzalloc(sizeof(*in), GFP_KERNEL);
-       if (!in)
-               return;
-
        if (qp->state != IB_QPS_RESET) {
                if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
                        mlx5_ib_qp_disable_pagefaults(qp);
                        err = mlx5_core_qp_modify(dev->mdev,
-                                                 MLX5_CMD_OP_2RST_QP, in, 0,
-                                                 &base->mqp);
+                                                 MLX5_CMD_OP_2RST_QP, 0,
+                                                 NULL, &base->mqp);
                } else {
                        err = modify_raw_packet_qp(dev, qp,
                                                   MLX5_CMD_OP_2RST_QP);
@@ -1924,8 +1928,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
                                     base->mqp.qpn);
        }
 
-       kfree(in);
-
        if (qp->create_type == MLX5_QP_KERNEL)
                destroy_qp_kernel(dev, qp);
        else if (qp->create_type == MLX5_QP_USER)
@@ -2511,7 +2513,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
        struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
        struct mlx5_ib_cq *send_cq, *recv_cq;
        struct mlx5_qp_context *context;
-       struct mlx5_modify_qp_mbox_in *in;
        struct mlx5_ib_pd *pd;
        enum mlx5_qp_state mlx5_cur, mlx5_new;
        enum mlx5_qp_optpar optpar;
@@ -2520,11 +2521,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
        int err;
        u16 op;
 
-       in = kzalloc(sizeof(*in), GFP_KERNEL);
-       if (!in)
+       context = kzalloc(sizeof(*context), GFP_KERNEL);
+       if (!context)
                return -ENOMEM;
 
-       context = &in->ctx;
        err = to_mlx5_st(ibqp->qp_type);
        if (err < 0) {
                mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type);
@@ -2689,12 +2689,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
        op = optab[mlx5_cur][mlx5_new];
        optpar = ib_mask_to_mlx5_opt(attr_mask);
        optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
-       in->optparam = cpu_to_be32(optpar);
 
        if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
                err = modify_raw_packet_qp(dev, qp, op);
        else
-               err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
+               err = mlx5_core_qp_modify(dev->mdev, op, optpar, context,
                                          &base->mqp);
        if (err)
                goto out;
@@ -2735,7 +2734,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
        }
 
 out:
-       kfree(in);
+       kfree(context);
        return err;
 }
 
@@ -2968,7 +2967,7 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
 
        memset(umr, 0, sizeof(*umr));
 
-       if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+       if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
                /* KLMs take twice the size of MTTs */
                ndescs *= 2;
 
@@ -3111,9 +3110,9 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
 
        memset(seg, 0, sizeof(*seg));
 
-       if (mr->access_mode == MLX5_ACCESS_MODE_MTT)
+       if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT)
                seg->log2_page_size = ilog2(mr->ibmr.page_size);
-       else if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+       else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
                /* KLMs take twice the size of MTTs */
                ndescs *= 2;
 
@@ -3454,7 +3453,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
        memset(seg, 0, sizeof(*seg));
 
        seg->flags = get_umr_flags(wr->access_flags) |
-                                  MLX5_ACCESS_MODE_KLM;
+                                  MLX5_MKC_ACCESS_MODE_KLMS;
        seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
        seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
                                    MLX5_MKEY_BSF_EN | pdn);
@@ -3658,12 +3657,8 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
                     struct ib_send_wr *wr, unsigned *idx,
                     int *size, int nreq)
 {
-       int err = 0;
-
-       if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
-               err = -ENOMEM;
-               return err;
-       }
+       if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
+               return -ENOMEM;
 
        *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
        *seg = mlx5_get_send_wqe(qp, *idx);
@@ -3679,7 +3674,7 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
        *seg += sizeof(**ctrl);
        *size = sizeof(**ctrl) / 16;
 
-       return err;
+       return 0;
 }
 
 static void finish_wqe(struct mlx5_ib_qp *qp,
@@ -3758,7 +3753,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                num_sge = wr->num_sge;
                if (unlikely(num_sge > qp->sq.max_gs)) {
                        mlx5_ib_warn(dev, "\n");
-                       err = -ENOMEM;
+                       err = -EINVAL;
                        *bad_wr = wr;
                        goto out;
                }
@@ -4320,21 +4315,24 @@ static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev,
 static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
                         struct ib_qp_attr *qp_attr)
 {
-       struct mlx5_query_qp_mbox_out *outb;
+       int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
        struct mlx5_qp_context *context;
        int mlx5_state;
+       u32 *outb;
        int err = 0;
 
-       outb = kzalloc(sizeof(*outb), GFP_KERNEL);
+       outb = kzalloc(outlen, GFP_KERNEL);
        if (!outb)
                return -ENOMEM;
 
-       context = &outb->ctx;
        err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb,
-                                sizeof(*outb));
+                                outlen);
        if (err)
                goto out;
 
+       /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */
+       context = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, outb, qpc);
+
        mlx5_state = be32_to_cpu(context->flags) >> 28;
 
        qp->state                    = to_ib_qp_state(mlx5_state);
index 16740dc..67fc0b6 100644 (file)
@@ -1156,18 +1156,18 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
        attr->max_srq =
                (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >>
                OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET;
-       attr->max_send_sge = ((rsp->max_write_send_sge &
+       attr->max_send_sge = ((rsp->max_recv_send_sge &
                               OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
                              OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT);
-       attr->max_recv_sge = (rsp->max_write_send_sge &
-                             OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
-           OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT;
+       attr->max_recv_sge = (rsp->max_recv_send_sge &
+                             OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK) >>
+           OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT;
        attr->max_srq_sge = (rsp->max_srq_rqe_sge &
                              OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK) >>
            OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET;
-       attr->max_rdma_sge = (rsp->max_write_send_sge &
-                             OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK) >>
-           OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT;
+       attr->max_rdma_sge = (rsp->max_wr_rd_sge &
+                             OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK) >>
+           OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT;
        attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp &
                                OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >>
            OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT;
index 0efc966..37df448 100644 (file)
@@ -554,9 +554,9 @@ enum {
        OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK               = 0x18,
        OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT         = 0,
        OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK          = 0xFFFF,
-       OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT        = 16,
-       OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK         = 0xFFFF <<
-                               OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT,
+       OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT = 16,
+       OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK          = 0xFFFF <<
+                               OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT,
 
        OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT       = 0,
        OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK        = 0xFFFF,
@@ -612,6 +612,8 @@ enum {
        OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET         = 0,
        OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK           = 0xFFFF <<
                                OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET,
+       OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT           = 0,
+       OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK            = 0xFFFF,
 };
 
 struct ocrdma_mbx_query_config {
@@ -619,7 +621,7 @@ struct ocrdma_mbx_query_config {
        struct ocrdma_mbx_rsp rsp;
        u32 qp_srq_cq_ird_ord;
        u32 max_pd_ca_ack_delay;
-       u32 max_write_send_sge;
+       u32 max_recv_send_sge;
        u32 max_ird_ord_per_qp;
        u32 max_shared_ird_ord;
        u32 max_mr;
@@ -639,6 +641,8 @@ struct ocrdma_mbx_query_config {
        u32 max_wqes_rqes_per_q;
        u32 max_cq_cqes_per_cq;
        u32 max_srq_rqe_sge;
+       u32 max_wr_rd_sge;
+       u32 ird_pgsz_num_pages;
 };
 
 struct ocrdma_fw_ver_rsp {
index b1a3d91..0aa8547 100644 (file)
@@ -125,8 +125,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
                                        IB_DEVICE_SYS_IMAGE_GUID |
                                        IB_DEVICE_LOCAL_DMA_LKEY |
                                        IB_DEVICE_MEM_MGT_EXTENSIONS;
-       attr->max_sge = dev->attr.max_send_sge;
-       attr->max_sge_rd = attr->max_sge;
+       attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge);
+       attr->max_sge_rd = dev->attr.max_rdma_sge;
        attr->max_cq = dev->attr.max_cq;
        attr->max_cqe = dev->attr.max_cqe;
        attr->max_mr = dev->attr.max_mr;
index 5e75b43..5bad8e3 100644 (file)
@@ -189,27 +189,32 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v)
 DEBUGFS_FILE(ctx_stats)
 
 static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
+       __acquires(RCU)
 {
        struct qib_qp_iter *iter;
        loff_t n = *pos;
 
-       rcu_read_lock();
        iter = qib_qp_iter_init(s->private);
+
+       /* stop calls rcu_read_unlock */
+       rcu_read_lock();
+
        if (!iter)
                return NULL;
 
-       while (n--) {
+       do {
                if (qib_qp_iter_next(iter)) {
                        kfree(iter);
                        return NULL;
                }
-       }
+       } while (n--);
 
        return iter;
 }
 
 static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
                                   loff_t *pos)
+       __must_hold(RCU)
 {
        struct qib_qp_iter *iter = iter_ptr;
 
@@ -224,6 +229,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
 }
 
 static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
+       __releases(RCU)
 {
        rcu_read_unlock();
 }
index fcdf379..c3edc03 100644 (file)
@@ -328,26 +328,12 @@ static ssize_t flash_write(struct file *file, const char __user *buf,
 
        pos = *ppos;
 
-       if (pos != 0) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (count != sizeof(struct qib_flash)) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       tmp = kmalloc(count, GFP_KERNEL);
-       if (!tmp) {
-               ret = -ENOMEM;
-               goto bail;
-       }
+       if (pos != 0 || count != sizeof(struct qib_flash))
+               return -EINVAL;
 
-       if (copy_from_user(tmp, buf, count)) {
-               ret = -EFAULT;
-               goto bail_tmp;
-       }
+       tmp = memdup_user(buf, count);
+       if (IS_ERR(tmp))
+               return PTR_ERR(tmp);
 
        dd = private2dd(file);
        if (qib_eeprom_write(dd, pos, tmp, count)) {
@@ -361,8 +347,6 @@ static ssize_t flash_write(struct file *file, const char __user *buf,
 
 bail_tmp:
        kfree(tmp);
-
-bail:
        return ret;
 }
 
index 9cc0aae..f9b8cd2 100644 (file)
@@ -573,10 +573,6 @@ struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev)
                return NULL;
 
        iter->dev = dev;
-       if (qib_qp_iter_next(iter)) {
-               kfree(iter);
-               return NULL;
-       }
 
        return iter;
 }
index c229b9f..0a89a95 100644 (file)
@@ -664,7 +664,8 @@ static int __init usnic_ib_init(void)
                return err;
        }
 
-       if (pci_register_driver(&usnic_ib_pci_driver)) {
+       err = pci_register_driver(&usnic_ib_pci_driver);
+       if (err) {
                usnic_err("Unable to register with PCI\n");
                goto out_umem_fini;
        }
index bdb540f..870b4f2 100644 (file)
@@ -873,7 +873,8 @@ bail_qpn:
        free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
 
 bail_rq_wq:
-       vfree(qp->r_rq.wq);
+       if (!qp->ip)
+               vfree(qp->r_rq.wq);
 
 bail_driver_priv:
        rdi->driver_f.qp_priv_free(rdi, qp);
index 4f7d9b4..9dbfcc0 100644 (file)
@@ -478,6 +478,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
                struct ipoib_ah *address, u32 qpn);
 void ipoib_reap_ah(struct work_struct *work);
 
+struct ipoib_path *__path_find(struct net_device *dev, void *gid);
 void ipoib_mark_paths_invalid(struct net_device *dev);
 void ipoib_flush_paths(struct net_device *dev);
 int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv);
index 951d9ab..4ad297d 100644 (file)
@@ -1318,6 +1318,8 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
        }
 }
 
+#define QPN_AND_OPTIONS_OFFSET 4
+
 static void ipoib_cm_tx_start(struct work_struct *work)
 {
        struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
@@ -1326,6 +1328,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
        struct ipoib_neigh *neigh;
        struct ipoib_cm_tx *p;
        unsigned long flags;
+       struct ipoib_path *path;
        int ret;
 
        struct ib_sa_path_rec pathrec;
@@ -1338,7 +1341,19 @@ static void ipoib_cm_tx_start(struct work_struct *work)
                p = list_entry(priv->cm.start_list.next, typeof(*p), list);
                list_del_init(&p->list);
                neigh = p->neigh;
+
                qpn = IPOIB_QPN(neigh->daddr);
+               /*
+                * As long as the search is with these 2 locks,
+                * path existence indicates its validity.
+                */
+               path = __path_find(dev, neigh->daddr + QPN_AND_OPTIONS_OFFSET);
+               if (!path) {
+                       pr_info("%s ignore not valid path %pI6\n",
+                               __func__,
+                               neigh->daddr + QPN_AND_OPTIONS_OFFSET);
+                       goto free_neigh;
+               }
                memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
 
                spin_unlock_irqrestore(&priv->lock, flags);
@@ -1350,6 +1365,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
                spin_lock_irqsave(&priv->lock, flags);
 
                if (ret) {
+free_neigh:
                        neigh = p->neigh;
                        if (neigh) {
                                neigh->cm = NULL;
index 74bcaa0..cc1c1b0 100644 (file)
@@ -485,7 +485,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
        return -EINVAL;
 }
 
-static struct ipoib_path *__path_find(struct net_device *dev, void *gid)
+struct ipoib_path *__path_find(struct net_device *dev, void *gid)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct rb_node *n = priv->path_tree.rb_node;
index ba6be06..cae9bbc 100644 (file)
@@ -403,6 +403,7 @@ isert_init_conn(struct isert_conn *isert_conn)
        INIT_LIST_HEAD(&isert_conn->node);
        init_completion(&isert_conn->login_comp);
        init_completion(&isert_conn->login_req_comp);
+       init_waitqueue_head(&isert_conn->rem_wait);
        kref_init(&isert_conn->kref);
        mutex_init(&isert_conn->mutex);
        INIT_WORK(&isert_conn->release_work, isert_release_work);
@@ -448,7 +449,7 @@ isert_alloc_login_buf(struct isert_conn *isert_conn,
 
        isert_conn->login_rsp_buf = kzalloc(ISER_RX_PAYLOAD_SIZE, GFP_KERNEL);
        if (!isert_conn->login_rsp_buf) {
-               isert_err("Unable to allocate isert_conn->login_rspbuf\n");
+               ret = -ENOMEM;
                goto out_unmap_login_req_buf;
        }
 
@@ -578,7 +579,8 @@ isert_connect_release(struct isert_conn *isert_conn)
        BUG_ON(!device);
 
        isert_free_rx_descriptors(isert_conn);
-       if (isert_conn->cm_id)
+       if (isert_conn->cm_id &&
+           !isert_conn->dev_removed)
                rdma_destroy_id(isert_conn->cm_id);
 
        if (isert_conn->qp) {
@@ -593,7 +595,10 @@ isert_connect_release(struct isert_conn *isert_conn)
 
        isert_device_put(device);
 
-       kfree(isert_conn);
+       if (isert_conn->dev_removed)
+               wake_up_interruptible(&isert_conn->rem_wait);
+       else
+               kfree(isert_conn);
 }
 
 static void
@@ -753,6 +758,7 @@ static int
 isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
        struct isert_np *isert_np = cma_id->context;
+       struct isert_conn *isert_conn;
        int ret = 0;
 
        isert_info("%s (%d): status %d id %p np %p\n",
@@ -773,10 +779,21 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
                break;
        case RDMA_CM_EVENT_ADDR_CHANGE:    /* FALLTHRU */
        case RDMA_CM_EVENT_DISCONNECTED:   /* FALLTHRU */
-       case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */
        case RDMA_CM_EVENT_TIMEWAIT_EXIT:  /* FALLTHRU */
                ret = isert_disconnected_handler(cma_id, event->event);
                break;
+       case RDMA_CM_EVENT_DEVICE_REMOVAL:
+               isert_conn = cma_id->qp->qp_context;
+               isert_conn->dev_removed = true;
+               isert_disconnected_handler(cma_id, event->event);
+               wait_event_interruptible(isert_conn->rem_wait,
+                                        isert_conn->state == ISER_CONN_DOWN);
+               kfree(isert_conn);
+               /*
+                * return non-zero from the callback to destroy
+                * the rdma cm id
+                */
+               return 1;
        case RDMA_CM_EVENT_REJECTED:       /* FALLTHRU */
        case RDMA_CM_EVENT_UNREACHABLE:    /* FALLTHRU */
        case RDMA_CM_EVENT_CONNECT_ERROR:
index fc791ef..c02ada5 100644 (file)
@@ -158,6 +158,8 @@ struct isert_conn {
        struct work_struct      release_work;
        bool                    logout_posted;
        bool                    snd_w_inv;
+       wait_queue_head_t       rem_wait;
+       bool                    dev_removed;
 };
 
 #define ISERT_MAX_CQ 64
index dfa23b0..883bbfe 100644 (file)
@@ -522,6 +522,11 @@ static int srpt_refresh_port(struct srpt_port *sport)
        if (ret)
                goto err_query_port;
 
+       snprintf(sport->port_guid, sizeof(sport->port_guid),
+               "0x%016llx%016llx",
+               be64_to_cpu(sport->gid.global.subnet_prefix),
+               be64_to_cpu(sport->gid.global.interface_id));
+
        if (!sport->mad_agent) {
                memset(&reg_req, 0, sizeof(reg_req));
                reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
@@ -2548,10 +2553,6 @@ static void srpt_add_one(struct ib_device *device)
                               sdev->device->name, i);
                        goto err_ring;
                }
-               snprintf(sport->port_guid, sizeof(sport->port_guid),
-                       "0x%016llx%016llx",
-                       be64_to_cpu(sport->gid.global.subnet_prefix),
-                       be64_to_cpu(sport->gid.global.interface_id));
        }
 
        spin_lock(&srpt_dev_lock);
index 7d61439..0c07e10 100644 (file)
@@ -376,7 +376,7 @@ static int tegra_kbc_start(struct tegra_kbc *kbc)
        /* Reset the KBC controller to clear all previous status.*/
        reset_control_assert(kbc->rst);
        udelay(100);
-       reset_control_assert(kbc->rst);
+       reset_control_deassert(kbc->rst);
        udelay(100);
 
        tegra_kbc_config_pins(kbc);
index faa295e..c83bce8 100644 (file)
@@ -553,7 +553,6 @@ int rmi_read_register_desc(struct rmi_device *d, u16 addr,
                goto free_struct_buff;
 
        reg = find_first_bit(rdesc->presense_map, RMI_REG_DESC_PRESENSE_BITS);
-       map_offset = 0;
        for (i = 0; i < rdesc->num_registers; i++) {
                struct rmi_register_desc_item *item = &rdesc->registers[i];
                int reg_size = struct_buf[offset];
@@ -576,6 +575,8 @@ int rmi_read_register_desc(struct rmi_device *d, u16 addr,
                item->reg = reg;
                item->reg_size = reg_size;
 
+               map_offset = 0;
+
                do {
                        for (b = 0; b < 7; b++) {
                                if (struct_buf[offset] & (0x1 << b))
index b4d3408..405252a 100644 (file)
@@ -1305,6 +1305,7 @@ static int __init i8042_create_aux_port(int idx)
        serio->write            = i8042_aux_write;
        serio->start            = i8042_start;
        serio->stop             = i8042_stop;
+       serio->ps2_cmd_mutex    = &i8042_mutex;
        serio->port_data        = port;
        serio->dev.parent       = &i8042_platform_device->dev;
        if (idx < 0) {
index a61b215..1ce3ecb 100644 (file)
@@ -1473,7 +1473,6 @@ static int ads7846_remove(struct spi_device *spi)
 
        ads784x_hwmon_unregister(spi, ts);
 
-       regulator_disable(ts->reg);
        regulator_put(ts->reg);
 
        if (!ts->get_pendown_state) {
index 7379fe1..b2744a6 100644 (file)
@@ -464,7 +464,7 @@ static int silead_ts_probe(struct i2c_client *client,
                return -ENODEV;
 
        /* Power GPIO pin */
-       data->gpio_power = gpiod_get_optional(dev, "power", GPIOD_OUT_LOW);
+       data->gpio_power = devm_gpiod_get_optional(dev, "power", GPIOD_OUT_LOW);
        if (IS_ERR(data->gpio_power)) {
                if (PTR_ERR(data->gpio_power) != -EPROBE_DEFER)
                        dev_err(dev, "Shutdown GPIO request failed\n");
index ce80117..641e887 100644 (file)
@@ -879,7 +879,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
         * We may have concurrent producers, so we need to be careful
         * not to touch any of the shadow cmdq state.
         */
-       queue_read(cmd, Q_ENT(q, idx), q->ent_dwords);
+       queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
        dev_err(smmu->dev, "skipping command in error state:\n");
        for (i = 0; i < ARRAY_SIZE(cmd); ++i)
                dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
@@ -890,7 +890,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
                return;
        }
 
-       queue_write(cmd, Q_ENT(q, idx), q->ent_dwords);
+       queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
 }
 
 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
@@ -1034,6 +1034,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
                case STRTAB_STE_0_CFG_S2_TRANS:
                        ste_live = true;
                        break;
+               case STRTAB_STE_0_CFG_ABORT:
+                       if (disable_bypass)
+                               break;
                default:
                        BUG(); /* STE corruption */
                }
index 4f49fe2..2db74eb 100644 (file)
@@ -686,8 +686,7 @@ static struct iommu_gather_ops arm_smmu_gather_ops = {
 
 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
 {
-       int flags, ret;
-       u32 fsr, fsynr, resume;
+       u32 fsr, fsynr;
        unsigned long iova;
        struct iommu_domain *domain = dev;
        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -701,34 +700,15 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
        if (!(fsr & FSR_FAULT))
                return IRQ_NONE;
 
-       if (fsr & FSR_IGN)
-               dev_err_ratelimited(smmu->dev,
-                                   "Unexpected context fault (fsr 0x%x)\n",
-                                   fsr);
-
        fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
-       flags = fsynr & FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ;
-
        iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
-       if (!report_iommu_fault(domain, smmu->dev, iova, flags)) {
-               ret = IRQ_HANDLED;
-               resume = RESUME_RETRY;
-       } else {
-               dev_err_ratelimited(smmu->dev,
-                   "Unhandled context fault: iova=0x%08lx, fsynr=0x%x, cb=%d\n",
-                   iova, fsynr, cfg->cbndx);
-               ret = IRQ_NONE;
-               resume = RESUME_TERMINATE;
-       }
-
-       /* Clear the faulting FSR */
-       writel(fsr, cb_base + ARM_SMMU_CB_FSR);
 
-       /* Retry or terminate any stalled transactions */
-       if (fsr & FSR_SS)
-               writel_relaxed(resume, cb_base + ARM_SMMU_CB_RESUME);
+       dev_err_ratelimited(smmu->dev,
+       "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
+                           fsr, iova, fsynr, cfg->cbndx);
 
-       return ret;
+       writel(fsr, cb_base + ARM_SMMU_CB_FSR);
+       return IRQ_HANDLED;
 }
 
 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
@@ -837,7 +817,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
        }
 
        /* SCTLR */
-       reg = SCTLR_CFCFG | SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP;
+       reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP;
        if (stage1)
                reg |= SCTLR_S1_ASIDPNE;
 #ifdef __BIG_ENDIAN
index 8c61399..def8ca1 100644 (file)
@@ -286,12 +286,14 @@ static int arm_v7s_pte_to_prot(arm_v7s_iopte pte, int lvl)
        int prot = IOMMU_READ;
        arm_v7s_iopte attr = pte >> ARM_V7S_ATTR_SHIFT(lvl);
 
-       if (attr & ARM_V7S_PTE_AP_RDONLY)
+       if (!(attr & ARM_V7S_PTE_AP_RDONLY))
                prot |= IOMMU_WRITE;
        if ((attr & (ARM_V7S_TEX_MASK << ARM_V7S_TEX_SHIFT)) == 0)
                prot |= IOMMU_MMIO;
        else if (pte & ARM_V7S_ATTR_C)
                prot |= IOMMU_CACHE;
+       if (pte & ARM_V7S_ATTR_XN(lvl))
+               prot |= IOMMU_NOEXEC;
 
        return prot;
 }
index 7ceaba8..36b9c28 100644 (file)
@@ -1545,7 +1545,12 @@ static int its_force_quiescent(void __iomem *base)
        u32 val;
 
        val = readl_relaxed(base + GITS_CTLR);
-       if (val & GITS_CTLR_QUIESCENT)
+       /*
+        * GIC architecture specification requires the ITS to be both
+        * disabled and quiescent for writes to GITS_BASER<n> or
+        * GITS_CBASER to not have UNPREDICTABLE results.
+        */
+       if ((val & GITS_CTLR_QUIESCENT) && !(val & GITS_CTLR_ENABLE))
                return 0;
 
        /* Disable the generation of all interrupts to this ITS */
index 6fc56c3..ede5672 100644 (file)
@@ -667,13 +667,20 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 #endif
 
 #ifdef CONFIG_CPU_PM
+/* Check whether it's single security state view */
+static bool gic_dist_security_disabled(void)
+{
+       return readl_relaxed(gic_data.dist_base + GICD_CTLR) & GICD_CTLR_DS;
+}
+
 static int gic_cpu_pm_notifier(struct notifier_block *self,
                               unsigned long cmd, void *v)
 {
        if (cmd == CPU_PM_EXIT) {
-               gic_enable_redist(true);
+               if (gic_dist_security_disabled())
+                       gic_enable_redist(true);
                gic_cpu_sys_reg_init();
-       } else if (cmd == CPU_PM_ENTER) {
+       } else if (cmd == CPU_PM_ENTER && gic_dist_security_disabled()) {
                gic_write_grpen1(0);
                gic_enable_redist(false);
        }
index c2cab57..390fac5 100644 (file)
@@ -769,6 +769,13 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
        int cpu;
        unsigned long flags, map = 0;
 
+       if (unlikely(nr_cpu_ids == 1)) {
+               /* Only one CPU? let's do a self-IPI... */
+               writel_relaxed(2 << 24 | irq,
+                              gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
+               return;
+       }
+
        raw_spin_lock_irqsave(&irq_controller_lock, flags);
 
        /* Convert our logical CPU mask into a physical one. */
index c5f33c3..83f4983 100644 (file)
@@ -713,9 +713,6 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq,
        unsigned long flags;
        int i;
 
-       irq_set_chip_and_handler(virq, &gic_level_irq_controller,
-                                handle_level_irq);
-
        spin_lock_irqsave(&gic_lock, flags);
        gic_map_to_pin(intr, gic_cpu_pin);
        gic_map_to_vpe(intr, mips_cm_vp_id(vpe));
@@ -732,6 +729,10 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq,
 {
        if (GIC_HWIRQ_TO_LOCAL(hw) < GIC_NUM_LOCAL_INTRS)
                return gic_local_irq_domain_map(d, virq, hw);
+
+       irq_set_chip_and_handler(virq, &gic_level_irq_controller,
+                                handle_level_irq);
+
        return gic_shared_irq_domain_map(d, virq, hw, 0);
 }
 
@@ -771,11 +772,13 @@ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq,
                        hwirq = GIC_SHARED_TO_HWIRQ(base_hwirq + i);
 
                        ret = irq_domain_set_hwirq_and_chip(d, virq + i, hwirq,
-                                                           &gic_edge_irq_controller,
+                                                           &gic_level_irq_controller,
                                                            NULL);
                        if (ret)
                                goto error;
 
+                       irq_set_handler(virq + i, handle_level_irq);
+
                        ret = gic_shared_irq_domain_map(d, virq + i, hwirq, cpu);
                        if (ret)
                                goto error;
@@ -890,10 +893,17 @@ void gic_dev_domain_free(struct irq_domain *d, unsigned int virq,
        return;
 }
 
+static void gic_dev_domain_activate(struct irq_domain *domain,
+                                   struct irq_data *d)
+{
+       gic_shared_irq_domain_map(domain, d->irq, d->hwirq, 0);
+}
+
 static struct irq_domain_ops gic_dev_domain_ops = {
        .xlate = gic_dev_domain_xlate,
        .alloc = gic_dev_domain_alloc,
        .free = gic_dev_domain_free,
+       .activate = gic_dev_domain_activate,
 };
 
 static int gic_ipi_domain_xlate(struct irq_domain *d, struct device_node *ctrlr,
index 292991c..e3fa1cd 100644 (file)
@@ -284,7 +284,7 @@ __write_ctrl_pciv2(struct fritzcard *fc, struct hdlc_hw *hdlc, u32 channel) {
                                          AVM_HDLC_STATUS_1));
 }
 
-void
+static void
 write_ctrl(struct bchannel *bch, int which) {
        struct fritzcard *fc = bch->hw;
        struct hdlc_hw *hdlc;
@@ -741,7 +741,7 @@ inithdlc(struct fritzcard *fc)
        modehdlc(&fc->bch[1], -1);
 }
 
-void
+static void
 clear_pending_hdlc_ints(struct fritzcard *fc)
 {
        u32 val;
@@ -962,7 +962,7 @@ avm_dctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
        return err;
 }
 
-int
+static int
 setup_fritz(struct fritzcard *fc)
 {
        u32 val, ver;
index 28543d7..480c2d7 100644 (file)
@@ -564,19 +564,19 @@ disable_hwirq(struct hfc_multi *hc)
 #define        MAX_TDM_CHAN 32
 
 
-inline void
+static inline void
 enablepcibridge(struct hfc_multi *c)
 {
        HFC_outb(c, R_BRG_PCM_CFG, (0x0 << 6) | 0x3); /* was _io before */
 }
 
-inline void
+static inline void
 disablepcibridge(struct hfc_multi *c)
 {
        HFC_outb(c, R_BRG_PCM_CFG, (0x0 << 6) | 0x2); /* was _io before */
 }
 
-inline unsigned char
+static inline unsigned char
 readpcibridge(struct hfc_multi *hc, unsigned char address)
 {
        unsigned short cipv;
@@ -604,7 +604,7 @@ readpcibridge(struct hfc_multi *hc, unsigned char address)
        return data;
 }
 
-inline void
+static inline void
 writepcibridge(struct hfc_multi *hc, unsigned char address, unsigned char data)
 {
        unsigned short cipv;
@@ -634,14 +634,14 @@ writepcibridge(struct hfc_multi *hc, unsigned char address, unsigned char data)
        outl(datav, hc->pci_iobase);
 }
 
-inline void
+static inline void
 cpld_set_reg(struct hfc_multi *hc, unsigned char reg)
 {
        /* Do data pin read low byte */
        HFC_outb(hc, R_GPIO_OUT1, reg);
 }
 
-inline void
+static inline void
 cpld_write_reg(struct hfc_multi *hc, unsigned char reg, unsigned char val)
 {
        cpld_set_reg(hc, reg);
@@ -653,7 +653,7 @@ cpld_write_reg(struct hfc_multi *hc, unsigned char reg, unsigned char val)
        return;
 }
 
-inline unsigned char
+static inline unsigned char
 cpld_read_reg(struct hfc_multi *hc, unsigned char reg)
 {
        unsigned char bytein;
@@ -670,14 +670,14 @@ cpld_read_reg(struct hfc_multi *hc, unsigned char reg)
        return bytein;
 }
 
-inline void
+static inline void
 vpm_write_address(struct hfc_multi *hc, unsigned short addr)
 {
        cpld_write_reg(hc, 0, 0xff & addr);
        cpld_write_reg(hc, 1, 0x01 & (addr >> 8));
 }
 
-inline unsigned short
+static inline unsigned short
 vpm_read_address(struct hfc_multi *c)
 {
        unsigned short addr;
@@ -691,7 +691,7 @@ vpm_read_address(struct hfc_multi *c)
        return addr & 0x1ff;
 }
 
-inline unsigned char
+static inline unsigned char
 vpm_in(struct hfc_multi *c, int which, unsigned short addr)
 {
        unsigned char res;
@@ -712,7 +712,7 @@ vpm_in(struct hfc_multi *c, int which, unsigned short addr)
        return res;
 }
 
-inline void
+static inline void
 vpm_out(struct hfc_multi *c, int which, unsigned short addr,
        unsigned char data)
 {
@@ -1024,7 +1024,7 @@ hfcmulti_resync(struct hfc_multi *locked, struct hfc_multi *newmaster, int rm)
 }
 
 /* This must be called AND hc must be locked irqsave!!! */
-inline void
+static inline void
 plxsd_checksync(struct hfc_multi *hc, int rm)
 {
        if (hc->syncronized) {
index aa9b6c3..8d338ba 100644 (file)
@@ -113,7 +113,7 @@ isac_ph_state_bh(struct dchannel *dch)
        pr_debug("%s: TE newstate %x\n", isac->name, dch->state);
 }
 
-void
+static void
 isac_empty_fifo(struct isac_hw *isac, int count)
 {
        u8 *ptr;
index 7416755..3b067ea 100644 (file)
@@ -848,7 +848,7 @@ dbusy_timer_handler(struct dchannel *dch)
        }
 }
 
-void initW6692(struct w6692_hw *card)
+static void initW6692(struct w6692_hw *card)
 {
        u8      val;
 
index 978eda8..8a3ba56 100644 (file)
@@ -73,7 +73,6 @@ MODULE_DEVICE_TABLE(i2c, ams_id);
 static struct i2c_driver ams_i2c_driver = {
        .driver = {
                .name   = "ams",
-               .owner  = THIS_MODULE,
        },
        .probe          = ams_i2c_probe,
        .remove         = ams_i2c_remove,
index 3024685..96d16fc 100644 (file)
@@ -668,7 +668,6 @@ static struct platform_driver wf_pm112_driver = {
        .remove = wf_pm112_remove,
        .driver = {
                .name = "windfarm",
-               .owner  = THIS_MODULE,
        },
 };
 
index 2f506b9..e88cfb3 100644 (file)
@@ -789,7 +789,6 @@ static struct platform_driver wf_pm72_driver = {
        .remove = wf_pm72_remove,
        .driver = {
                .name = "windfarm",
-               .owner  = THIS_MODULE,
        },
 };
 
index 82fc86a..bdfcb8a 100644 (file)
@@ -682,7 +682,6 @@ static struct platform_driver wf_rm31_driver = {
        .remove = wf_rm31_remove,
        .driver = {
                .name = "windfarm",
-               .owner  = THIS_MODULE,
        },
 };
 
index 97c3729..7817d40 100644 (file)
@@ -127,6 +127,7 @@ config XGENE_SLIMPRO_MBOX
 config BCM_PDC_MBOX
        tristate "Broadcom PDC Mailbox"
        depends on ARM64 || COMPILE_TEST
+       depends on HAS_DMA
        default ARCH_BCM_IPROC
        help
          Mailbox implementation for the Broadcom PDC ring manager,
index cbe0c1e..c19dd82 100644 (file)
@@ -469,7 +469,7 @@ static const struct file_operations pdc_debugfs_stats = {
  * this directory for a SPU.
  * @pdcs: PDC state structure
  */
-void pdc_setup_debugfs(struct pdc_state *pdcs)
+static void pdc_setup_debugfs(struct pdc_state *pdcs)
 {
        char spu_stats_name[16];
 
@@ -485,7 +485,7 @@ void pdc_setup_debugfs(struct pdc_state *pdcs)
                                                  &pdc_debugfs_stats);
 }
 
-void pdc_free_debugfs(void)
+static void pdc_free_debugfs(void)
 {
        if (debugfs_dir && simple_empty(debugfs_dir)) {
                debugfs_remove_recursive(debugfs_dir);
@@ -1191,10 +1191,11 @@ static void pdc_shutdown(struct mbox_chan *chan)
 {
        struct pdc_state *pdcs = chan->con_priv;
 
-       if (pdcs)
-               dev_dbg(&pdcs->pdev->dev,
-                       "Shutdown mailbox channel for PDC %u", pdcs->pdc_idx);
+       if (!pdcs)
+               return;
 
+       dev_dbg(&pdcs->pdev->dev,
+               "Shutdown mailbox channel for PDC %u", pdcs->pdc_idx);
        pdc_ring_free(pdcs);
 }
 
index 95a4ca6..849ad44 100644 (file)
@@ -760,7 +760,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
        if (!d->nr_stripes ||
            d->nr_stripes > INT_MAX ||
            d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) {
-               pr_err("nr_stripes too large");
+               pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
+                       (unsigned)d->nr_stripes);
                return -ENOMEM;
        }
 
@@ -1820,7 +1821,7 @@ static int cache_alloc(struct cache *ca)
        free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
 
        if (!init_fifo(&ca->free[RESERVE_BTREE], 8, GFP_KERNEL) ||
-           !init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
+           !init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
            !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
            !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
            !init_fifo(&ca->free_inc,   free << 2, GFP_KERNEL) ||
@@ -1844,7 +1845,7 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
                                struct block_device *bdev, struct cache *ca)
 {
        char name[BDEVNAME_SIZE];
-       const char *err = NULL;
+       const char *err = NULL; /* must be set for any error case */
        int ret = 0;
 
        memcpy(&ca->sb, sb, sizeof(struct cache_sb));
@@ -1861,8 +1862,13 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
                ca->discard = CACHE_DISCARD(&ca->sb);
 
        ret = cache_alloc(ca);
-       if (ret != 0)
+       if (ret != 0) {
+               if (ret == -ENOMEM)
+                       err = "cache_alloc(): -ENOMEM";
+               else
+                       err = "cache_alloc(): unknown error";
                goto err;
+       }
 
        if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) {
                err = "error calling kobject_add";
index 6fff794..13041ee 100644 (file)
@@ -2183,19 +2183,29 @@ location_show(struct mddev *mddev, char *page)
 static ssize_t
 location_store(struct mddev *mddev, const char *buf, size_t len)
 {
+       int rv;
 
+       rv = mddev_lock(mddev);
+       if (rv)
+               return rv;
        if (mddev->pers) {
-               if (!mddev->pers->quiesce)
-                       return -EBUSY;
-               if (mddev->recovery || mddev->sync_thread)
-                       return -EBUSY;
+               if (!mddev->pers->quiesce) {
+                       rv = -EBUSY;
+                       goto out;
+               }
+               if (mddev->recovery || mddev->sync_thread) {
+                       rv = -EBUSY;
+                       goto out;
+               }
        }
 
        if (mddev->bitmap || mddev->bitmap_info.file ||
            mddev->bitmap_info.offset) {
                /* bitmap already configured.  Only option is to clear it */
-               if (strncmp(buf, "none", 4) != 0)
-                       return -EBUSY;
+               if (strncmp(buf, "none", 4) != 0) {
+                       rv = -EBUSY;
+                       goto out;
+               }
                if (mddev->pers) {
                        mddev->pers->quiesce(mddev, 1);
                        bitmap_destroy(mddev);
@@ -2214,21 +2224,25 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
                        /* nothing to be done */;
                else if (strncmp(buf, "file:", 5) == 0) {
                        /* Not supported yet */
-                       return -EINVAL;
+                       rv = -EINVAL;
+                       goto out;
                } else {
-                       int rv;
                        if (buf[0] == '+')
                                rv = kstrtoll(buf+1, 10, &offset);
                        else
                                rv = kstrtoll(buf, 10, &offset);
                        if (rv)
-                               return rv;
-                       if (offset == 0)
-                               return -EINVAL;
+                               goto out;
+                       if (offset == 0) {
+                               rv = -EINVAL;
+                               goto out;
+                       }
                        if (mddev->bitmap_info.external == 0 &&
                            mddev->major_version == 0 &&
-                           offset != mddev->bitmap_info.default_offset)
-                               return -EINVAL;
+                           offset != mddev->bitmap_info.default_offset) {
+                               rv = -EINVAL;
+                               goto out;
+                       }
                        mddev->bitmap_info.offset = offset;
                        if (mddev->pers) {
                                struct bitmap *bitmap;
@@ -2245,7 +2259,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
                                mddev->pers->quiesce(mddev, 0);
                                if (rv) {
                                        bitmap_destroy(mddev);
-                                       return rv;
+                                       goto out;
                                }
                        }
                }
@@ -2257,6 +2271,11 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
                set_bit(MD_CHANGE_DEVS, &mddev->flags);
                md_wakeup_thread(mddev->thread);
        }
+       rv = 0;
+out:
+       mddev_unlock(mddev);
+       if (rv)
+               return rv;
        return len;
 }
 
index 6571c81..8625040 100644 (file)
@@ -1879,7 +1879,7 @@ static int __init dm_bufio_init(void)
        __cache_size_refresh();
        mutex_unlock(&dm_bufio_clients_lock);
 
-       dm_bufio_wq = create_singlethread_workqueue("dm_bufio_cache");
+       dm_bufio_wq = alloc_workqueue("dm_bufio_cache", WQ_MEM_RECLAIM, 0);
        if (!dm_bufio_wq)
                return -ENOMEM;
 
index 4e9784b..8742957 100644 (file)
@@ -181,7 +181,7 @@ struct crypt_config {
        u8 key[0];
 };
 
-#define MIN_IOS        16
+#define MIN_IOS        64
 
 static void clone_init(struct dm_crypt_io *, struct bio *);
 static void kcryptd_queue_crypt(struct dm_crypt_io *io);
@@ -1453,7 +1453,7 @@ static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode)
        unsigned i;
        int err;
 
-       cc->tfms = kmalloc(cc->tfms_count * sizeof(struct crypto_skcipher *),
+       cc->tfms = kzalloc(cc->tfms_count * sizeof(struct crypto_skcipher *),
                           GFP_KERNEL);
        if (!cc->tfms)
                return -ENOMEM;
@@ -1924,6 +1924,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
                return DM_MAPIO_REMAPPED;
        }
 
+       /*
+        * Check if bio is too large, split as needed.
+        */
+       if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) &&
+           bio_data_dir(bio) == WRITE)
+               dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT));
+
        io = dm_per_bio_data(bio, cc->per_bio_data_size);
        crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
        io->ctx.req = (struct skcipher_request *)(io + 1);
index 97e446d..6a2e8dd 100644 (file)
@@ -289,15 +289,13 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
                pb->bio_submitted = true;
 
                /*
-                * Map reads as normal only if corrupt_bio_byte set.
+                * Error reads if neither corrupt_bio_byte or drop_writes are set.
+                * Otherwise, flakey_end_io() will decide if the reads should be modified.
                 */
                if (bio_data_dir(bio) == READ) {
-                       /* If flags were specified, only corrupt those that match. */
-                       if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
-                           all_corrupt_bio_flags_match(bio, fc))
-                               goto map_bio;
-                       else
+                       if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags))
                                return -EIO;
+                       goto map_bio;
                }
 
                /*
@@ -334,14 +332,21 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
        struct flakey_c *fc = ti->private;
        struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
 
-       /*
-        * Corrupt successful READs while in down state.
-        */
        if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
-               if (fc->corrupt_bio_byte)
+               if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
+                   all_corrupt_bio_flags_match(bio, fc)) {
+                       /*
+                        * Corrupt successful matching READs while in down state.
+                        */
                        corrupt_bio_data(bio, fc);
-               else
+
+               } else if (!test_bit(DROP_WRITES, &fc->flags)) {
+                       /*
+                        * Error read during the down_interval if drop_writes
+                        * wasn't configured.
+                        */
                        return -EIO;
+               }
        }
 
        return error;
index 4ab6803..49e4d8d 100644 (file)
@@ -259,12 +259,12 @@ static int log_one_block(struct log_writes_c *lc,
                goto out;
        sector++;
 
-       bio = bio_alloc(GFP_KERNEL, block->vec_cnt);
+       atomic_inc(&lc->io_blocks);
+       bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES));
        if (!bio) {
                DMERR("Couldn't alloc log bio");
                goto error;
        }
-       atomic_inc(&lc->io_blocks);
        bio->bi_iter.bi_size = 0;
        bio->bi_iter.bi_sector = sector;
        bio->bi_bdev = lc->logdev->bdev;
@@ -282,7 +282,7 @@ static int log_one_block(struct log_writes_c *lc,
                if (ret != block->vecs[i].bv_len) {
                        atomic_inc(&lc->io_blocks);
                        submit_bio(bio);
-                       bio = bio_alloc(GFP_KERNEL, block->vec_cnt - i);
+                       bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt - i, BIO_MAX_PAGES));
                        if (!bio) {
                                DMERR("Couldn't alloc log bio");
                                goto error;
@@ -459,9 +459,9 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv)
                goto bad;
        }
 
-       ret = -EINVAL;
        lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write");
-       if (!lc->log_kthread) {
+       if (IS_ERR(lc->log_kthread)) {
+               ret = PTR_ERR(lc->log_kthread);
                ti->error = "Couldn't alloc kthread";
                dm_put_device(ti, lc->dev);
                dm_put_device(ti, lc->logdev);
index 4ca2d1d..07fc1ad 100644 (file)
@@ -291,9 +291,10 @@ static void header_from_disk(struct log_header_core *core, struct log_header_dis
        core->nr_regions = le64_to_cpu(disk->nr_regions);
 }
 
-static int rw_header(struct log_c *lc, int rw)
+static int rw_header(struct log_c *lc, int op)
 {
-       lc->io_req.bi_op = rw;
+       lc->io_req.bi_op = op;
+       lc->io_req.bi_op_flags = 0;
 
        return dm_io(&lc->io_req, 1, &lc->header_location, NULL);
 }
@@ -316,7 +317,7 @@ static int read_header(struct log_c *log)
 {
        int r;
 
-       r = rw_header(log, READ);
+       r = rw_header(log, REQ_OP_READ);
        if (r)
                return r;
 
@@ -630,7 +631,7 @@ static int disk_resume(struct dm_dirty_log *log)
        header_to_disk(&lc->header, lc->disk_header);
 
        /* write the new header */
-       r = rw_header(lc, WRITE);
+       r = rw_header(lc, REQ_OP_WRITE);
        if (!r) {
                r = flush_header(lc);
                if (r)
@@ -698,7 +699,7 @@ static int disk_flush(struct dm_dirty_log *log)
                        log_clear_bit(lc, lc->clean_bits, i);
        }
 
-       r = rw_header(lc, WRITE);
+       r = rw_header(lc, REQ_OP_WRITE);
        if (r)
                fail_log_device(lc);
        else {
index 1b9795d..8abde6b 100644 (file)
@@ -191,7 +191,6 @@ struct raid_dev {
 #define RT_FLAG_RS_BITMAP_LOADED       2
 #define RT_FLAG_UPDATE_SBS             3
 #define RT_FLAG_RESHAPE_RS             4
-#define RT_FLAG_KEEP_RS_FROZEN         5
 
 /* Array elements of 64 bit needed for rebuild/failed disk bits */
 #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
@@ -861,6 +860,9 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
 {
        unsigned long min_region_size = rs->ti->len / (1 << 21);
 
+       if (rs_is_raid0(rs))
+               return 0;
+
        if (!region_size) {
                /*
                 * Choose a reasonable default.  All figures in sectors.
@@ -930,6 +932,8 @@ static int validate_raid_redundancy(struct raid_set *rs)
                        rebuild_cnt++;
 
        switch (rs->raid_type->level) {
+       case 0:
+               break;
        case 1:
                if (rebuild_cnt >= rs->md.raid_disks)
                        goto too_many;
@@ -2335,6 +2339,13 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
                case 0:
                        break;
                default:
+                       /*
+                        * We have to keep any raid0 data/metadata device pairs or
+                        * the MD raid0 personality will fail to start the array.
+                        */
+                       if (rs_is_raid0(rs))
+                               continue;
+
                        dev = container_of(rdev, struct raid_dev, rdev);
                        if (dev->meta_dev)
                                dm_put_device(ti, dev->meta_dev);
@@ -2579,7 +2590,6 @@ static int rs_prepare_reshape(struct raid_set *rs)
                } else {
                        /* Process raid1 without delta_disks */
                        mddev->raid_disks = rs->raid_disks;
-                       set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
                        reshape = false;
                }
        } else {
@@ -2590,7 +2600,6 @@ static int rs_prepare_reshape(struct raid_set *rs)
        if (reshape) {
                set_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags);
                set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
-               set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
        } else if (mddev->raid_disks < rs->raid_disks)
                /* Create new superblocks and bitmaps, if any new disks */
                set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
@@ -2902,7 +2911,6 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
                        goto bad;
 
                set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
-               set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
                /* Takeover ain't recovery, so disable recovery */
                rs_setup_recovery(rs, MaxSector);
                rs_set_new(rs);
@@ -3386,21 +3394,28 @@ static void raid_postsuspend(struct dm_target *ti)
 {
        struct raid_set *rs = ti->private;
 
-       if (test_and_clear_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) {
-               if (!rs->md.suspended)
-                       mddev_suspend(&rs->md);
-               rs->md.ro = 1;
-       }
+       if (!rs->md.suspended)
+               mddev_suspend(&rs->md);
+
+       rs->md.ro = 1;
 }
 
 static void attempt_restore_of_faulty_devices(struct raid_set *rs)
 {
        int i;
-       uint64_t failed_devices, cleared_failed_devices = 0;
+       uint64_t cleared_failed_devices[DISKS_ARRAY_ELEMS];
        unsigned long flags;
+       bool cleared = false;
        struct dm_raid_superblock *sb;
+       struct mddev *mddev = &rs->md;
        struct md_rdev *r;
 
+       /* RAID personalities have to provide hot add/remove methods or we need to bail out. */
+       if (!mddev->pers || !mddev->pers->hot_add_disk || !mddev->pers->hot_remove_disk)
+               return;
+
+       memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));
+
        for (i = 0; i < rs->md.raid_disks; i++) {
                r = &rs->dev[i].rdev;
                if (test_bit(Faulty, &r->flags) && r->sb_page &&
@@ -3420,7 +3435,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
                         * ourselves.
                         */
                        if ((r->raid_disk >= 0) &&
-                           (r->mddev->pers->hot_remove_disk(r->mddev, r) != 0))
+                           (mddev->pers->hot_remove_disk(mddev, r) != 0))
                                /* Failed to revive this device, try next */
                                continue;
 
@@ -3430,22 +3445,30 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
                        clear_bit(Faulty, &r->flags);
                        clear_bit(WriteErrorSeen, &r->flags);
                        clear_bit(In_sync, &r->flags);
-                       if (r->mddev->pers->hot_add_disk(r->mddev, r)) {
+                       if (mddev->pers->hot_add_disk(mddev, r)) {
                                r->raid_disk = -1;
                                r->saved_raid_disk = -1;
                                r->flags = flags;
                        } else {
                                r->recovery_offset = 0;
-                               cleared_failed_devices |= 1 << i;
+                               set_bit(i, (void *) cleared_failed_devices);
+                               cleared = true;
                        }
                }
        }
-       if (cleared_failed_devices) {
+
+       /* If any failed devices could be cleared, update all sbs failed_devices bits */
+       if (cleared) {
+               uint64_t failed_devices[DISKS_ARRAY_ELEMS];
+
                rdev_for_each(r, &rs->md) {
                        sb = page_address(r->sb_page);
-                       failed_devices = le64_to_cpu(sb->failed_devices);
-                       failed_devices &= ~cleared_failed_devices;
-                       sb->failed_devices = cpu_to_le64(failed_devices);
+                       sb_retrieve_failed_devices(sb, failed_devices);
+
+                       for (i = 0; i < DISKS_ARRAY_ELEMS; i++)
+                               failed_devices[i] &= ~cleared_failed_devices[i];
+
+                       sb_update_failed_devices(sb, failed_devices);
                }
        }
 }
@@ -3610,26 +3633,15 @@ static void raid_resume(struct dm_target *ti)
                 * devices are reachable again.
                 */
                attempt_restore_of_faulty_devices(rs);
-       } else {
-               mddev->ro = 0;
-               mddev->in_sync = 0;
+       }
 
-               /*
-                * When passing in flags to the ctr, we expect userspace
-                * to reset them because they made it to the superblocks
-                * and reload the mapping anyway.
-                *
-                * -> only unfreeze recovery in case of a table reload or
-                *    we'll have a bogus recovery/reshape position
-                *    retrieved from the superblock by the ctr because
-                *    the ongoing recovery/reshape will change it after read.
-                */
-               if (!test_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags))
-                       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+       mddev->ro = 0;
+       mddev->in_sync = 0;
 
-               if (mddev->suspended)
-                       mddev_resume(mddev);
-       }
+       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+
+       if (mddev->suspended)
+               mddev_resume(mddev);
 }
 
 static struct target_type raid_target = {
index 4ace1da..6c25213 100644 (file)
@@ -210,14 +210,17 @@ static struct dm_path *rr_select_path(struct path_selector *ps, size_t nr_bytes)
        struct path_info *pi = NULL;
        struct dm_path *current_path = NULL;
 
+       local_irq_save(flags);
        current_path = *this_cpu_ptr(s->current_path);
        if (current_path) {
                percpu_counter_dec(&s->repeat_count);
-               if (percpu_counter_read_positive(&s->repeat_count) > 0)
+               if (percpu_counter_read_positive(&s->repeat_count) > 0) {
+                       local_irq_restore(flags);
                        return current_path;
+               }
        }
 
-       spin_lock_irqsave(&s->lock, flags);
+       spin_lock(&s->lock);
        if (!list_empty(&s->valid_paths)) {
                pi = list_entry(s->valid_paths.next, struct path_info, list);
                list_move_tail(&pi->list, &s->valid_paths);
index 41573f1..34a840d 100644 (file)
@@ -834,8 +834,10 @@ static int join(struct mddev *mddev, int nodes)
                goto err;
        }
        cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
-       if (!cinfo->ack_lockres)
+       if (!cinfo->ack_lockres) {
+               ret = -ENOMEM;
                goto err;
+       }
        /* get sync CR lock on ACK. */
        if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR))
                pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n",
@@ -849,8 +851,10 @@ static int join(struct mddev *mddev, int nodes)
        pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number);
        snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1);
        cinfo->bitmap_lockres = lockres_init(mddev, str, NULL, 1);
-       if (!cinfo->bitmap_lockres)
+       if (!cinfo->bitmap_lockres) {
+               ret = -ENOMEM;
                goto err;
+       }
        if (dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW)) {
                pr_err("Failed to get bitmap lock\n");
                ret = -EINVAL;
@@ -858,8 +862,10 @@ static int join(struct mddev *mddev, int nodes)
        }
 
        cinfo->resync_lockres = lockres_init(mddev, "resync", NULL, 0);
-       if (!cinfo->resync_lockres)
+       if (!cinfo->resync_lockres) {
+               ret = -ENOMEM;
                goto err;
+       }
 
        return 0;
 err:
index d646f6e..67642ba 100644 (file)
@@ -1604,11 +1604,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
                        mddev->new_chunk_sectors = mddev->chunk_sectors;
                }
 
-               if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) {
+               if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
                        set_bit(MD_HAS_JOURNAL, &mddev->flags);
-                       if (mddev->recovery_cp == MaxSector)
-                               set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
-               }
        } else if (mddev->pers == NULL) {
                /* Insist of good event counter while assembling, except for
                 * spares (which don't need an event count) */
@@ -5851,6 +5848,9 @@ static int get_array_info(struct mddev *mddev, void __user *arg)
                        working++;
                        if (test_bit(In_sync, &rdev->flags))
                                insync++;
+                       else if (test_bit(Journal, &rdev->flags))
+                               /* TODO: add journal count to md_u.h */
+                               ;
                        else
                                spare++;
                }
@@ -7862,6 +7862,7 @@ void md_do_sync(struct md_thread *thread)
         */
 
        do {
+               int mddev2_minor = -1;
                mddev->curr_resync = 2;
 
        try_again:
@@ -7891,10 +7892,14 @@ void md_do_sync(struct md_thread *thread)
                                prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
                                if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
                                    mddev2->curr_resync >= mddev->curr_resync) {
-                                       printk(KERN_INFO "md: delaying %s of %s"
-                                              " until %s has finished (they"
-                                              " share one or more physical units)\n",
-                                              desc, mdname(mddev), mdname(mddev2));
+                                       if (mddev2_minor != mddev2->md_minor) {
+                                               mddev2_minor = mddev2->md_minor;
+                                               printk(KERN_INFO "md: delaying %s of %s"
+                                                      " until %s has finished (they"
+                                                      " share one or more physical units)\n",
+                                                      desc, mdname(mddev),
+                                                      mdname(mddev2));
+                                       }
                                        mddev_put(mddev2);
                                        if (signal_pending(current))
                                                flush_signals(current);
@@ -8275,16 +8280,13 @@ no_add:
 static void md_start_sync(struct work_struct *ws)
 {
        struct mddev *mddev = container_of(ws, struct mddev, del_work);
-       int ret = 0;
 
        mddev->sync_thread = md_register_thread(md_do_sync,
                                                mddev,
                                                "resync");
        if (!mddev->sync_thread) {
-               if (!(mddev_is_clustered(mddev) && ret == -EAGAIN))
-                       printk(KERN_ERR "%s: could not start resync"
-                              " thread...\n",
-                              mdname(mddev));
+               printk(KERN_ERR "%s: could not start resync thread...\n",
+                      mdname(mddev));
                /* leave the spares where they are, it shouldn't hurt */
                clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
                clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
index 0e4efcd..be1a9fc 100644 (file)
@@ -1064,6 +1064,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
        int max_sectors;
        int sectors;
 
+       md_write_start(mddev, bio);
+
        /*
         * Register the new request and wait if the reconstruction
         * thread has put up a bar for new requests.
@@ -1445,8 +1447,6 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio)
                return;
        }
 
-       md_write_start(mddev, bio);
-
        do {
 
                /*
@@ -2465,20 +2465,21 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
 
        while (sect_to_write) {
                struct bio *wbio;
+               sector_t wsector;
                if (sectors > sect_to_write)
                        sectors = sect_to_write;
                /* Write at 'sector' for 'sectors' */
                wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
                bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
-               wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
-                                  choose_data_offset(r10_bio, rdev) +
-                                  (sector - r10_bio->sector));
+               wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector);
+               wbio->bi_iter.bi_sector = wsector +
+                                  choose_data_offset(r10_bio, rdev);
                wbio->bi_bdev = rdev->bdev;
                bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
 
                if (submit_bio_wait(wbio) < 0)
                        /* Failure! */
-                       ok = rdev_set_badblocks(rdev, sector,
+                       ok = rdev_set_badblocks(rdev, wsector,
                                                sectors, 0)
                                && ok;
 
index 8912407..da583bb 100644 (file)
@@ -659,6 +659,7 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
 {
        struct stripe_head *sh;
        int hash = stripe_hash_locks_hash(sector);
+       int inc_empty_inactive_list_flag;
 
        pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
 
@@ -703,7 +704,12 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
                                        atomic_inc(&conf->active_stripes);
                                BUG_ON(list_empty(&sh->lru) &&
                                       !test_bit(STRIPE_EXPANDING, &sh->state));
+                               inc_empty_inactive_list_flag = 0;
+                               if (!list_empty(conf->inactive_list + hash))
+                                       inc_empty_inactive_list_flag = 1;
                                list_del_init(&sh->lru);
+                               if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
+                                       atomic_inc(&conf->empty_inactive_list_nr);
                                if (sh->group) {
                                        sh->group->stripes_cnt--;
                                        sh->group = NULL;
@@ -762,6 +768,7 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
        sector_t head_sector, tmp_sec;
        int hash;
        int dd_idx;
+       int inc_empty_inactive_list_flag;
 
        /* Don't cross chunks, so stripe pd_idx/qd_idx is the same */
        tmp_sec = sh->sector;
@@ -779,7 +786,12 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
                                atomic_inc(&conf->active_stripes);
                        BUG_ON(list_empty(&head->lru) &&
                               !test_bit(STRIPE_EXPANDING, &head->state));
+                       inc_empty_inactive_list_flag = 0;
+                       if (!list_empty(conf->inactive_list + hash))
+                               inc_empty_inactive_list_flag = 1;
                        list_del_init(&head->lru);
+                       if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
+                               atomic_inc(&conf->empty_inactive_list_nr);
                        if (head->group) {
                                head->group->stripes_cnt--;
                                head->group = NULL;
@@ -993,7 +1005,6 @@ again:
 
                        set_bit(STRIPE_IO_STARTED, &sh->state);
 
-                       bio_reset(bi);
                        bi->bi_bdev = rdev->bdev;
                        bio_set_op_attrs(bi, op, op_flags);
                        bi->bi_end_io = op_is_write(op)
@@ -1045,7 +1056,6 @@ again:
 
                        set_bit(STRIPE_IO_STARTED, &sh->state);
 
-                       bio_reset(rbi);
                        rbi->bi_bdev = rrdev->bdev;
                        bio_set_op_attrs(rbi, op, op_flags);
                        BUG_ON(!op_is_write(op));
@@ -1978,9 +1988,11 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
        put_cpu();
 }
 
-static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
+static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
+       int disks)
 {
        struct stripe_head *sh;
+       int i;
 
        sh = kmem_cache_zalloc(sc, gfp);
        if (sh) {
@@ -1989,6 +2001,17 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
                INIT_LIST_HEAD(&sh->batch_list);
                INIT_LIST_HEAD(&sh->lru);
                atomic_set(&sh->count, 1);
+               for (i = 0; i < disks; i++) {
+                       struct r5dev *dev = &sh->dev[i];
+
+                       bio_init(&dev->req);
+                       dev->req.bi_io_vec = &dev->vec;
+                       dev->req.bi_max_vecs = 1;
+
+                       bio_init(&dev->rreq);
+                       dev->rreq.bi_io_vec = &dev->rvec;
+                       dev->rreq.bi_max_vecs = 1;
+               }
        }
        return sh;
 }
@@ -1996,7 +2019,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
 {
        struct stripe_head *sh;
 
-       sh = alloc_stripe(conf->slab_cache, gfp);
+       sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size);
        if (!sh)
                return 0;
 
@@ -2167,7 +2190,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
        mutex_lock(&conf->cache_size_mutex);
 
        for (i = conf->max_nr_stripes; i; i--) {
-               nsh = alloc_stripe(sc, GFP_KERNEL);
+               nsh = alloc_stripe(sc, GFP_KERNEL, newsize);
                if (!nsh)
                        break;
 
@@ -2299,6 +2322,7 @@ static void raid5_end_read_request(struct bio * bi)
                (unsigned long long)sh->sector, i, atomic_read(&sh->count),
                bi->bi_error);
        if (i == disks) {
+               bio_reset(bi);
                BUG();
                return;
        }
@@ -2402,6 +2426,7 @@ static void raid5_end_read_request(struct bio * bi)
        clear_bit(R5_LOCKED, &sh->dev[i].flags);
        set_bit(STRIPE_HANDLE, &sh->state);
        raid5_release_stripe(sh);
+       bio_reset(bi);
 }
 
 static void raid5_end_write_request(struct bio *bi)
@@ -2436,6 +2461,7 @@ static void raid5_end_write_request(struct bio *bi)
                (unsigned long long)sh->sector, i, atomic_read(&sh->count),
                bi->bi_error);
        if (i == disks) {
+               bio_reset(bi);
                BUG();
                return;
        }
@@ -2479,22 +2505,13 @@ static void raid5_end_write_request(struct bio *bi)
 
        if (sh->batch_head && sh != sh->batch_head)
                raid5_release_stripe(sh->batch_head);
+       bio_reset(bi);
 }
 
 static void raid5_build_block(struct stripe_head *sh, int i, int previous)
 {
        struct r5dev *dev = &sh->dev[i];
 
-       bio_init(&dev->req);
-       dev->req.bi_io_vec = &dev->vec;
-       dev->req.bi_max_vecs = 1;
-       dev->req.bi_private = sh;
-
-       bio_init(&dev->rreq);
-       dev->rreq.bi_io_vec = &dev->rvec;
-       dev->rreq.bi_max_vecs = 1;
-       dev->rreq.bi_private = sh;
-
        dev->flags = 0;
        dev->sector = raid5_compute_blocknr(sh, i, previous);
 }
@@ -4628,7 +4645,9 @@ finish:
        }
 
        if (!bio_list_empty(&s.return_bi)) {
-               if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) {
+               if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags) &&
+                               (s.failed <= conf->max_degraded ||
+                                       conf->mddev->external == 0)) {
                        spin_lock_irq(&conf->device_lock);
                        bio_list_merge(&conf->return_bi, &s.return_bi);
                        spin_unlock_irq(&conf->device_lock);
@@ -6826,11 +6845,14 @@ static int raid5_run(struct mddev *mddev)
        if (IS_ERR(conf))
                return PTR_ERR(conf);
 
-       if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !journal_dev) {
-               printk(KERN_ERR "md/raid:%s: journal disk is missing, force array readonly\n",
-                      mdname(mddev));
-               mddev->ro = 1;
-               set_disk_ro(mddev->gendisk, 1);
+       if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
+               if (!journal_dev) {
+                       pr_err("md/raid:%s: journal disk is missing, force array readonly\n",
+                              mdname(mddev));
+                       mddev->ro = 1;
+                       set_disk_ro(mddev->gendisk, 1);
+               } else if (mddev->recovery_cp == MaxSector)
+                       set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
        }
 
        conf->min_offset_diff = min_offset_diff;
index 869c83f..f00f3e7 100644 (file)
@@ -2185,7 +2185,7 @@ static int gpmc_probe_dt(struct platform_device *pdev)
        return 0;
 }
 
-static int gpmc_probe_dt_children(struct platform_device *pdev)
+static void gpmc_probe_dt_children(struct platform_device *pdev)
 {
        int ret;
        struct device_node *child;
@@ -2200,11 +2200,11 @@ static int gpmc_probe_dt_children(struct platform_device *pdev)
                else
                        ret = gpmc_probe_generic_child(pdev, child);
 
-               if (ret)
-                       return ret;
+               if (ret) {
+                       dev_err(&pdev->dev, "failed to probe DT child '%s': %d\n",
+                               child->name, ret);
+               }
        }
-
-       return 0;
 }
 #else
 static int gpmc_probe_dt(struct platform_device *pdev)
@@ -2212,9 +2212,8 @@ static int gpmc_probe_dt(struct platform_device *pdev)
        return 0;
 }
 
-static int gpmc_probe_dt_children(struct platform_device *pdev)
+static void gpmc_probe_dt_children(struct platform_device *pdev)
 {
-       return 0;
 }
 #endif /* CONFIG_OF */
 
@@ -2369,16 +2368,10 @@ static int gpmc_probe(struct platform_device *pdev)
                goto setup_irq_failed;
        }
 
-       rc = gpmc_probe_dt_children(pdev);
-       if (rc < 0) {
-               dev_err(gpmc->dev, "failed to probe DT children\n");
-               goto dt_children_failed;
-       }
+       gpmc_probe_dt_children(pdev);
 
        return 0;
 
-dt_children_failed:
-       gpmc_free_irq(gpmc);
 setup_irq_failed:
        gpmc_gpio_exit(gpmc);
 gpio_init_failed:
index a216b46..d002528 100644 (file)
@@ -345,16 +345,6 @@ config SENSORS_TSL2550
          This driver can also be built as a module.  If so, the module
          will be called tsl2550.
 
-config SENSORS_BH1780
-       tristate "ROHM BH1780GLI ambient light sensor"
-       depends on I2C && SYSFS
-       help
-         If you say yes here you get support for the ROHM BH1780GLI
-         ambient light sensor.
-
-         This driver can also be built as a module.  If so, the module
-         will be called bh1780gli.
-
 config SENSORS_BH1770
          tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor"
          depends on I2C
index 7410c6d..fb32516 100644 (file)
@@ -19,7 +19,6 @@ obj-$(CONFIG_TIFM_CORE)               += tifm_core.o
 obj-$(CONFIG_TIFM_7XX1)        += tifm_7xx1.o
 obj-$(CONFIG_PHANTOM)          += phantom.o
 obj-$(CONFIG_QCOM_COINCELL)    += qcom-coincell.o
-obj-$(CONFIG_SENSORS_BH1780)   += bh1780gli.o
 obj-$(CONFIG_SENSORS_BH1770)   += bh1770glc.o
 obj-$(CONFIG_SENSORS_APDS990X) += apds990x.o
 obj-$(CONFIG_SGI_IOC4)         += ioc4.o
diff --git a/drivers/misc/bh1780gli.c b/drivers/misc/bh1780gli.c
deleted file mode 100644 (file)
index 7f90ce5..0000000
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * bh1780gli.c
- * ROHM Ambient Light Sensor Driver
- *
- * Copyright (C) 2010 Texas Instruments
- * Author: Hemanth V <hemanthv@ti.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#include <linux/i2c.h>
-#include <linux/slab.h>
-#include <linux/mutex.h>
-#include <linux/platform_device.h>
-#include <linux/delay.h>
-#include <linux/module.h>
-#include <linux/of.h>
-
-#define BH1780_REG_CONTROL     0x80
-#define BH1780_REG_PARTID      0x8A
-#define BH1780_REG_MANFID      0x8B
-#define BH1780_REG_DLOW        0x8C
-#define BH1780_REG_DHIGH       0x8D
-
-#define BH1780_REVMASK         (0xf)
-#define BH1780_POWMASK         (0x3)
-#define BH1780_POFF            (0x0)
-#define BH1780_PON             (0x3)
-
-/* power on settling time in ms */
-#define BH1780_PON_DELAY       2
-
-struct bh1780_data {
-       struct i2c_client *client;
-       int power_state;
-       /* lock for sysfs operations */
-       struct mutex lock;
-};
-
-static int bh1780_write(struct bh1780_data *ddata, u8 reg, u8 val, char *msg)
-{
-       int ret = i2c_smbus_write_byte_data(ddata->client, reg, val);
-       if (ret < 0)
-               dev_err(&ddata->client->dev,
-                       "i2c_smbus_write_byte_data failed error %d Register (%s)\n",
-                       ret, msg);
-       return ret;
-}
-
-static int bh1780_read(struct bh1780_data *ddata, u8 reg, char *msg)
-{
-       int ret = i2c_smbus_read_byte_data(ddata->client, reg);
-       if (ret < 0)
-               dev_err(&ddata->client->dev,
-                       "i2c_smbus_read_byte_data failed error %d Register (%s)\n",
-                       ret, msg);
-       return ret;
-}
-
-static ssize_t bh1780_show_lux(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-       struct platform_device *pdev = to_platform_device(dev);
-       struct bh1780_data *ddata = platform_get_drvdata(pdev);
-       int lsb, msb;
-
-       lsb = bh1780_read(ddata, BH1780_REG_DLOW, "DLOW");
-       if (lsb < 0)
-               return lsb;
-
-       msb = bh1780_read(ddata, BH1780_REG_DHIGH, "DHIGH");
-       if (msb < 0)
-               return msb;
-
-       return sprintf(buf, "%d\n", (msb << 8) | lsb);
-}
-
-static ssize_t bh1780_show_power_state(struct device *dev,
-                                       struct device_attribute *attr,
-                                       char *buf)
-{
-       struct platform_device *pdev = to_platform_device(dev);
-       struct bh1780_data *ddata = platform_get_drvdata(pdev);
-       int state;
-
-       state = bh1780_read(ddata, BH1780_REG_CONTROL, "CONTROL");
-       if (state < 0)
-               return state;
-
-       return sprintf(buf, "%d\n", state & BH1780_POWMASK);
-}
-
-static ssize_t bh1780_store_power_state(struct device *dev,
-                                       struct device_attribute *attr,
-                                       const char *buf, size_t count)
-{
-       struct platform_device *pdev = to_platform_device(dev);
-       struct bh1780_data *ddata = platform_get_drvdata(pdev);
-       unsigned long val;
-       int error;
-
-       error = kstrtoul(buf, 0, &val);
-       if (error)
-               return error;
-
-       if (val < BH1780_POFF || val > BH1780_PON)
-               return -EINVAL;
-
-       mutex_lock(&ddata->lock);
-
-       error = bh1780_write(ddata, BH1780_REG_CONTROL, val, "CONTROL");
-       if (error < 0) {
-               mutex_unlock(&ddata->lock);
-               return error;
-       }
-
-       msleep(BH1780_PON_DELAY);
-       ddata->power_state = val;
-       mutex_unlock(&ddata->lock);
-
-       return count;
-}
-
-static DEVICE_ATTR(lux, S_IRUGO, bh1780_show_lux, NULL);
-
-static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO,
-               bh1780_show_power_state, bh1780_store_power_state);
-
-static struct attribute *bh1780_attributes[] = {
-       &dev_attr_power_state.attr,
-       &dev_attr_lux.attr,
-       NULL
-};
-
-static const struct attribute_group bh1780_attr_group = {
-       .attrs = bh1780_attributes,
-};
-
-static int bh1780_probe(struct i2c_client *client,
-                                               const struct i2c_device_id *id)
-{
-       int ret;
-       struct bh1780_data *ddata;
-       struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
-
-       if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE))
-               return -EIO;
-
-       ddata = devm_kzalloc(&client->dev, sizeof(struct bh1780_data),
-                            GFP_KERNEL);
-       if (ddata == NULL)
-               return -ENOMEM;
-
-       ddata->client = client;
-       i2c_set_clientdata(client, ddata);
-
-       ret = bh1780_read(ddata, BH1780_REG_PARTID, "PART ID");
-       if (ret < 0)
-               return ret;
-
-       dev_info(&client->dev, "Ambient Light Sensor, Rev : %d\n",
-                       (ret & BH1780_REVMASK));
-
-       mutex_init(&ddata->lock);
-
-       return sysfs_create_group(&client->dev.kobj, &bh1780_attr_group);
-}
-
-static int bh1780_remove(struct i2c_client *client)
-{
-       sysfs_remove_group(&client->dev.kobj, &bh1780_attr_group);
-
-       return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int bh1780_suspend(struct device *dev)
-{
-       struct bh1780_data *ddata;
-       int state, ret;
-       struct i2c_client *client = to_i2c_client(dev);
-
-       ddata = i2c_get_clientdata(client);
-       state = bh1780_read(ddata, BH1780_REG_CONTROL, "CONTROL");
-       if (state < 0)
-               return state;
-
-       ddata->power_state = state & BH1780_POWMASK;
-
-       ret = bh1780_write(ddata, BH1780_REG_CONTROL, BH1780_POFF,
-                               "CONTROL");
-
-       if (ret < 0)
-               return ret;
-
-       return 0;
-}
-
-static int bh1780_resume(struct device *dev)
-{
-       struct bh1780_data *ddata;
-       int state, ret;
-       struct i2c_client *client = to_i2c_client(dev);
-
-       ddata = i2c_get_clientdata(client);
-       state = ddata->power_state;
-       ret = bh1780_write(ddata, BH1780_REG_CONTROL, state,
-                               "CONTROL");
-
-       if (ret < 0)
-               return ret;
-
-       return 0;
-}
-#endif /* CONFIG_PM_SLEEP */
-
-static SIMPLE_DEV_PM_OPS(bh1780_pm, bh1780_suspend, bh1780_resume);
-
-static const struct i2c_device_id bh1780_id[] = {
-       { "bh1780", 0 },
-       { },
-};
-
-MODULE_DEVICE_TABLE(i2c, bh1780_id);
-
-#ifdef CONFIG_OF
-static const struct of_device_id of_bh1780_match[] = {
-       { .compatible = "rohm,bh1780gli", },
-       {},
-};
-
-MODULE_DEVICE_TABLE(of, of_bh1780_match);
-#endif
-
-static struct i2c_driver bh1780_driver = {
-       .probe          = bh1780_probe,
-       .remove         = bh1780_remove,
-       .id_table       = bh1780_id,
-       .driver = {
-               .name = "bh1780",
-               .pm     = &bh1780_pm,
-               .of_match_table = of_match_ptr(of_bh1780_match),
-       },
-};
-
-module_i2c_driver(bh1780_driver);
-
-MODULE_DESCRIPTION("BH1780GLI Ambient Light Sensor Driver");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Hemanth V <hemanthv@ti.com>");
index 7ada5f1..3519ace 100644 (file)
@@ -230,6 +230,11 @@ int cxl_pci_vphb_add(struct cxl_afu *afu)
        if (phb->bus == NULL)
                return -ENXIO;
 
+       /* Set release hook on root bus */
+       pci_set_host_bridge_release(to_pci_host_bridge(phb->bus->bridge),
+                                   pcibios_free_controller_deferred,
+                                   (void *) phb);
+
        /* Claim resources. This might need some rework as well depending
         * whether we are doing probe-only or not, like assigning unassigned
         * resources etc...
@@ -256,7 +261,10 @@ void cxl_pci_vphb_remove(struct cxl_afu *afu)
        afu->phb = NULL;
 
        pci_remove_root_bus(phb->bus);
-       pcibios_free_controller(phb);
+       /*
+        * We don't free phb here - that's handled by
+        * pcibios_free_controller_deferred()
+        */
 }
 
 static bool _cxl_pci_is_vphb_device(struct pci_controller *phb)
index 166b1db..3564477 100644 (file)
@@ -4,7 +4,7 @@
  */
 #include "lkdtm.h"
 
-void lkdtm_rodata_do_nothing(void)
+void notrace lkdtm_rodata_do_nothing(void)
 {
        /* Does nothing. We just want an architecture agnostic "return". */
 }
index 5525a20..1dd6114 100644 (file)
@@ -9,7 +9,15 @@
 #include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 
-static size_t cache_size = 1024;
+/*
+ * Many of the tests here end up using const sizes, but those would
+ * normally be ignored by hardened usercopy, so force the compiler
+ * into choosing the non-const path to make sure we trigger the
+ * hardened usercopy checks by added "unconst" to all the const copies,
+ * and making sure "cache_size" isn't optimized into a const.
+ */
+static volatile size_t unconst = 0;
+static volatile size_t cache_size = 1024;
 static struct kmem_cache *bad_cache;
 
 static const unsigned char test_text[] = "This is a test.\n";
@@ -67,14 +75,14 @@ static noinline void do_usercopy_stack(bool to_user, bool bad_frame)
        if (to_user) {
                pr_info("attempting good copy_to_user of local stack\n");
                if (copy_to_user((void __user *)user_addr, good_stack,
-                                sizeof(good_stack))) {
+                                unconst + sizeof(good_stack))) {
                        pr_warn("copy_to_user failed unexpectedly?!\n");
                        goto free_user;
                }
 
                pr_info("attempting bad copy_to_user of distant stack\n");
                if (copy_to_user((void __user *)user_addr, bad_stack,
-                                sizeof(good_stack))) {
+                                unconst + sizeof(good_stack))) {
                        pr_warn("copy_to_user failed, but lacked Oops\n");
                        goto free_user;
                }
@@ -88,14 +96,14 @@ static noinline void do_usercopy_stack(bool to_user, bool bad_frame)
 
                pr_info("attempting good copy_from_user of local stack\n");
                if (copy_from_user(good_stack, (void __user *)user_addr,
-                                  sizeof(good_stack))) {
+                                  unconst + sizeof(good_stack))) {
                        pr_warn("copy_from_user failed unexpectedly?!\n");
                        goto free_user;
                }
 
                pr_info("attempting bad copy_from_user of distant stack\n");
                if (copy_from_user(bad_stack, (void __user *)user_addr,
-                                  sizeof(good_stack))) {
+                                  unconst + sizeof(good_stack))) {
                        pr_warn("copy_from_user failed, but lacked Oops\n");
                        goto free_user;
                }
@@ -109,7 +117,7 @@ static void do_usercopy_heap_size(bool to_user)
 {
        unsigned long user_addr;
        unsigned char *one, *two;
-       const size_t size = 1024;
+       size_t size = unconst + 1024;
 
        one = kmalloc(size, GFP_KERNEL);
        two = kmalloc(size, GFP_KERNEL);
@@ -285,13 +293,14 @@ void lkdtm_USERCOPY_KERNEL(void)
 
        pr_info("attempting good copy_to_user from kernel rodata\n");
        if (copy_to_user((void __user *)user_addr, test_text,
-                        sizeof(test_text))) {
+                        unconst + sizeof(test_text))) {
                pr_warn("copy_to_user failed unexpectedly?!\n");
                goto free_user;
        }
 
        pr_info("attempting bad copy_to_user from kernel text\n");
-       if (copy_to_user((void __user *)user_addr, vm_mmap, PAGE_SIZE)) {
+       if (copy_to_user((void __user *)user_addr, vm_mmap,
+                        unconst + PAGE_SIZE)) {
                pr_warn("copy_to_user failed, but lacked Oops\n");
                goto free_user;
        }
index e2fb44c..dc3a854 100644 (file)
@@ -1263,8 +1263,14 @@ static bool mei_me_fw_type_nm(struct pci_dev *pdev)
 static bool mei_me_fw_type_sps(struct pci_dev *pdev)
 {
        u32 reg;
-       /* Read ME FW Status check for SPS Firmware */
-       pci_read_config_dword(pdev, PCI_CFG_HFS_1, &reg);
+       unsigned int devfn;
+
+       /*
+        * Read ME FW Status register to check for SPS Firmware
+        * The SPS FW is only signaled in pci function 0
+        */
+       devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
+       pci_bus_read_config_dword(pdev->bus, devfn, PCI_CFG_HFS_1, &reg);
        trace_mei_pci_cfg_read(&pdev->dev, "PCI_CFG_HFS_1", PCI_CFG_HFS_1, reg);
        /* if bits [19:16] = 15, running SPS Firmware */
        return (reg & 0xf0000) == 0xf0000;
index 64e64da..71cea9b 100644 (file)
@@ -85,8 +85,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
 
        {MEI_PCI_DEVICE(MEI_DEV_ID_SPT, mei_me_pch8_cfg)},
        {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, mei_me_pch8_cfg)},
-       {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_cfg)},
-       {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_cfg)},
+       {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_sps_cfg)},
+       {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_sps_cfg)},
 
        {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, mei_me_pch8_cfg)},
        {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, mei_me_pch8_cfg)},
index 48a5dd7..2206d44 100644 (file)
@@ -1726,6 +1726,7 @@ static u8 mmc_blk_prep_packed_list(struct mmc_queue *mq, struct request *req)
                        break;
 
                if (req_op(next) == REQ_OP_DISCARD ||
+                   req_op(next) == REQ_OP_SECURE_ERASE ||
                    req_op(next) == REQ_OP_FLUSH)
                        break;
 
@@ -2150,6 +2151,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
        struct mmc_card *card = md->queue.card;
        struct mmc_host *host = card->host;
        unsigned long flags;
+       bool req_is_special = mmc_req_is_special(req);
 
        if (req && !mq->mqrq_prev->req)
                /* claim host only for the first request */
@@ -2190,8 +2192,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
        }
 
 out:
-       if ((!req && !(mq->flags & MMC_QUEUE_NEW_REQUEST)) ||
-           mmc_req_is_special(req))
+       if ((!req && !(mq->flags & MMC_QUEUE_NEW_REQUEST)) || req_is_special)
                /*
                 * Release host when there are no more requests
                 * and after special request(discard, flush) is done.
index bf14642..7080572 100644 (file)
@@ -33,7 +33,8 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
        /*
         * We only like normal block requests and discards.
         */
-       if (req->cmd_type != REQ_TYPE_FS && req_op(req) != REQ_OP_DISCARD) {
+       if (req->cmd_type != REQ_TYPE_FS && req_op(req) != REQ_OP_DISCARD &&
+           req_op(req) != REQ_OP_SECURE_ERASE) {
                blk_dump_rq_flags(req, "MMC bad request");
                return BLKPREP_KILL;
        }
@@ -64,6 +65,8 @@ static int mmc_queue_thread(void *d)
                spin_unlock_irq(q->queue_lock);
 
                if (req || mq->mqrq_prev->req) {
+                       bool req_is_special = mmc_req_is_special(req);
+
                        set_current_state(TASK_RUNNING);
                        mq->issue_fn(mq, req);
                        cond_resched();
@@ -79,7 +82,7 @@ static int mmc_queue_thread(void *d)
                         * has been finished. Do not assign it to previous
                         * request.
                         */
-                       if (mmc_req_is_special(req))
+                       if (req_is_special)
                                mq->mqrq_cur->req = NULL;
 
                        mq->mqrq_prev->brq.mrq.data = NULL;
index d625311..fee5e12 100644 (file)
@@ -4,7 +4,9 @@
 static inline bool mmc_req_is_special(struct request *req)
 {
        return req &&
-               (req_op(req) == REQ_OP_FLUSH || req_op(req) == REQ_OP_DISCARD);
+               (req_op(req) == REQ_OP_FLUSH ||
+                req_op(req) == REQ_OP_DISCARD ||
+                req_op(req) == REQ_OP_SECURE_ERASE);
 }
 
 struct request;
index 217e8da..3f31ca3 100644 (file)
@@ -1341,9 +1341,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
                            slave_dev->name);
        }
 
-       /* already enslaved */
-       if (slave_dev->flags & IFF_SLAVE) {
-               netdev_dbg(bond_dev, "Error: Device was already enslaved\n");
+       /* already in-use? */
+       if (netdev_is_rx_handler_busy(slave_dev)) {
+               netdev_err(bond_dev,
+                          "Error: Device is in use and cannot be enslaved\n");
                return -EBUSY;
        }
 
@@ -4627,7 +4628,7 @@ static int bond_init(struct net_device *bond_dev)
 
        netdev_dbg(bond_dev, "Begin bond_init\n");
 
-       bond->wq = create_singlethread_workqueue(bond_dev->name);
+       bond->wq = alloc_ordered_workqueue(bond_dev->name, WQ_MEM_RECLAIM);
        if (!bond->wq)
                return -ENOMEM;
 
index 8f45443..0659846 100644 (file)
@@ -16,6 +16,7 @@ config NET_DSA_BCM_SF2
        select FIXED_PHY
        select BCM7XXX_PHY
        select MDIO_BCM_UNIMAC
+       select B53
        ---help---
          This enables support for the Broadcom Starfighter 2 Ethernet
          switch chips.
@@ -24,4 +25,13 @@ source "drivers/net/dsa/b53/Kconfig"
 
 source "drivers/net/dsa/mv88e6xxx/Kconfig"
 
+config NET_DSA_QCA8K
+       tristate "Qualcomm Atheros QCA8K Ethernet switch family support"
+       depends on NET_DSA
+       select NET_DSA_TAG_QCA
+       select REGMAP
+       ---help---
+         This enables support for the Qualcomm Atheros QCA8K Ethernet
+         switch chips.
+
 endmenu
index ca1e71b..8346e4f 100644 (file)
@@ -1,5 +1,6 @@
 obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
 obj-$(CONFIG_NET_DSA_BCM_SF2)  += bcm_sf2.o
+obj-$(CONFIG_NET_DSA_QCA8K)    += qca8k.o
 
 obj-y                          += b53/
 obj-y                          += mv88e6xxx/
index 38ee10d..0afc2e5 100644 (file)
@@ -167,6 +167,65 @@ static const struct b53_mib_desc b53_mibs[] = {
 
 #define B53_MIBS_SIZE  ARRAY_SIZE(b53_mibs)
 
+static const struct b53_mib_desc b53_mibs_58xx[] = {
+       { 8, 0x00, "TxOctets" },
+       { 4, 0x08, "TxDropPkts" },
+       { 4, 0x0c, "TxQPKTQ0" },
+       { 4, 0x10, "TxBroadcastPkts" },
+       { 4, 0x14, "TxMulticastPkts" },
+       { 4, 0x18, "TxUnicastPKts" },
+       { 4, 0x1c, "TxCollisions" },
+       { 4, 0x20, "TxSingleCollision" },
+       { 4, 0x24, "TxMultipleCollision" },
+       { 4, 0x28, "TxDeferredCollision" },
+       { 4, 0x2c, "TxLateCollision" },
+       { 4, 0x30, "TxExcessiveCollision" },
+       { 4, 0x34, "TxFrameInDisc" },
+       { 4, 0x38, "TxPausePkts" },
+       { 4, 0x3c, "TxQPKTQ1" },
+       { 4, 0x40, "TxQPKTQ2" },
+       { 4, 0x44, "TxQPKTQ3" },
+       { 4, 0x48, "TxQPKTQ4" },
+       { 4, 0x4c, "TxQPKTQ5" },
+       { 8, 0x50, "RxOctets" },
+       { 4, 0x58, "RxUndersizePkts" },
+       { 4, 0x5c, "RxPausePkts" },
+       { 4, 0x60, "RxPkts64Octets" },
+       { 4, 0x64, "RxPkts65to127Octets" },
+       { 4, 0x68, "RxPkts128to255Octets" },
+       { 4, 0x6c, "RxPkts256to511Octets" },
+       { 4, 0x70, "RxPkts512to1023Octets" },
+       { 4, 0x74, "RxPkts1024toMaxPktsOctets" },
+       { 4, 0x78, "RxOversizePkts" },
+       { 4, 0x7c, "RxJabbers" },
+       { 4, 0x80, "RxAlignmentErrors" },
+       { 4, 0x84, "RxFCSErrors" },
+       { 8, 0x88, "RxGoodOctets" },
+       { 4, 0x90, "RxDropPkts" },
+       { 4, 0x94, "RxUnicastPkts" },
+       { 4, 0x98, "RxMulticastPkts" },
+       { 4, 0x9c, "RxBroadcastPkts" },
+       { 4, 0xa0, "RxSAChanges" },
+       { 4, 0xa4, "RxFragments" },
+       { 4, 0xa8, "RxJumboPkt" },
+       { 4, 0xac, "RxSymblErr" },
+       { 4, 0xb0, "InRangeErrCount" },
+       { 4, 0xb4, "OutRangeErrCount" },
+       { 4, 0xb8, "EEELpiEvent" },
+       { 4, 0xbc, "EEELpiDuration" },
+       { 4, 0xc0, "RxDiscard" },
+       { 4, 0xc8, "TxQPKTQ6" },
+       { 4, 0xcc, "TxQPKTQ7" },
+       { 4, 0xd0, "TxPkts64Octets" },
+       { 4, 0xd4, "TxPkts65to127Octets" },
+       { 4, 0xd8, "TxPkts128to255Octets" },
+       { 4, 0xdc, "TxPkts256to511Ocets" },
+       { 4, 0xe0, "TxPkts512to1023Ocets" },
+       { 4, 0xe4, "TxPkts1024toMaxPktOcets" },
+};
+
+#define B53_MIBS_58XX_SIZE     ARRAY_SIZE(b53_mibs_58xx)
+
 static int b53_do_vlan_op(struct b53_device *dev, u8 op)
 {
        unsigned int i;
@@ -418,7 +477,7 @@ static int b53_fast_age_vlan(struct b53_device *dev, u16 vid)
 
 static void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port)
 {
-       struct b53_device *dev = ds_to_priv(ds);
+       struct b53_device *dev = ds->priv;
        unsigned int i;
        u16 pvlan;
 
@@ -436,7 +495,7 @@ static void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port)
 static int b53_enable_port(struct dsa_switch *ds, int port,
                           struct phy_device *phy)
 {
-       struct b53_device *dev = ds_to_priv(ds);
+       struct b53_device *dev = ds->priv;
        unsigned int cpu_port = dev->cpu_port;
        u16 pvlan;
 
@@ -461,7 +520,7 @@ static int b53_enable_port(struct dsa_switch *ds, int port,
 static void b53_disable_port(struct dsa_switch *ds, int port,
                             struct phy_device *phy)
 {
-       struct b53_device *dev = ds_to_priv(ds);
+       struct b53_device *dev = ds->priv;
        u8 reg;
 
        /* Disable Tx/Rx for the port */
@@ -570,7 +629,7 @@ static int b53_switch_reset(struct b53_device *dev)
 
 static int b53_phy_read16(struct dsa_switch *ds, int addr, int reg)
 {
-       struct b53_device *priv = ds_to_priv(ds);
+       struct b53_device *priv = ds->priv;
        u16 value = 0;
        int ret;
 
@@ -585,7 +644,7 @@ static int b53_phy_read16(struct dsa_switch *ds, int addr, int reg)
 
 static int b53_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val)
 {
-       struct b53_device *priv = ds_to_priv(ds);
+       struct b53_device *priv = ds->priv;
 
        if (priv->ops->phy_write16)
                return priv->ops->phy_write16(priv, addr, reg, val);
@@ -635,6 +694,8 @@ static const struct b53_mib_desc *b53_get_mib(struct b53_device *dev)
                return b53_mibs_65;
        else if (is63xx(dev))
                return b53_mibs_63xx;
+       else if (is58xx(dev))
+               return b53_mibs_58xx;
        else
                return b53_mibs;
 }
@@ -645,13 +706,15 @@ static unsigned int b53_get_mib_size(struct b53_device *dev)
                return B53_MIBS_65_SIZE;
        else if (is63xx(dev))
                return B53_MIBS_63XX_SIZE;
+       else if (is58xx(dev))
+               return B53_MIBS_58XX_SIZE;
        else
                return B53_MIBS_SIZE;
 }
 
 static void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
 {
-       struct b53_device *dev = ds_to_priv(ds);
+       struct b53_device *dev = ds->priv;
        const struct b53_mib_desc *mibs = b53_get_mib(dev);
        unsigned int mib_size = b53_get_mib_size(dev);
        unsigned int i;
@@ -664,7 +727,7 @@ static void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
 static void b53_get_ethtool_stats(struct dsa_switch *ds, int port,
                                  uint64_t *data)
 {
-       struct b53_device *dev = ds_to_priv(ds);
+       struct b53_device *dev = ds->priv;
        const struct b53_mib_desc *mibs = b53_get_mib(dev);
        unsigned int mib_size = b53_get_mib_size(dev);
        const struct b53_mib_desc *s;
@@ -696,7 +759,7 @@ static void b53_get_ethtool_stats(struct dsa_switch *ds, int port,
 
 static int b53_get_sset_count(struct dsa_switch *ds)
 {
-       struct b53_device *dev = ds_to_priv(ds);
+       struct b53_device *dev = ds->priv;
 
        return b53_get_mib_size(dev);
 }
@@ -708,7 +771,7 @@ static int b53_set_addr(struct dsa_switch *ds, u8 *addr)
 
 static int b53_setup(struct dsa_switch *ds)
 {
-       struct b53_device *dev = ds_to_priv(ds);
+       struct b53_device *dev = ds->priv;
        unsigned int port;
        int ret;
 
@@ -739,7 +802,7 @@ static int b53_setup(struct dsa_switch *ds)
 static void b53_adjust_link(struct dsa_switch *ds, int port,
                            struct phy_device *phydev)
 {
-       struct b53_device *dev = ds_to_priv(ds);
+       struct b53_device *dev = ds->priv;
        u8 rgmii_ctrl = 0, reg = 0, off;
 
        if (!phy_is_pseudo_fixed_link(phydev))
@@ -873,7 +936,7 @@ static int b53_vlan_prepare(struct dsa_switch *ds, int port,
                            const struct switchdev_obj_port_vlan *vlan,
                            struct switchdev_trans *trans)
 {
-       struct b53_device *dev = ds_to_priv(ds);
+       struct b53_device *dev = ds->priv;
 
        if ((is5325(dev) || is5365(dev)) && vlan->vid_begin == 0)
                return -EOPNOTSUPP;
@@ -890,7 +953,7 @@ static void b53_vlan_add(struct dsa_switch *ds, int port,
                         const struct switchdev_obj_port_vlan *vlan,
                         struct switchdev_trans *trans)
 {
-       struct b53_device *dev = ds_to_priv(ds);
+       struct b53_device *dev = ds->priv;
        bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
        bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
        unsigned int cpu_port = dev->cpu_port;
@@ -924,7 +987,7 @@ static void b53_vlan_add(struct dsa_switch *ds, int port,
 static int b53_vlan_del(struct dsa_switch *ds, int port,
                        const struct switchdev_obj_port_vlan *vlan)
 {
-       struct b53_device *dev = ds_to_priv(ds);
+       struct b53_device *dev = ds->priv;
        bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
        unsigned int cpu_port = dev->cpu_port;
        struct b53_vlan *vl;
@@ -970,7 +1033,7 @@ static int b53_vlan_dump(struct dsa_switch *ds, int port,
                         struct switchdev_obj_port_vlan *vlan,
                         int (*cb)(struct switchdev_obj *obj))
 {
-       struct b53_device *dev = ds_to_priv(ds);
+       struct b53_device *dev = ds->priv;
        u16 vid, vid_start = 0, pvid;
        struct b53_vlan *vl;
        int err = 0;
@@ -1129,7 +1192,7 @@ static int b53_fdb_prepare(struct dsa_switch *ds, int port,
                           const struct switchdev_obj_port_fdb *fdb,
                           struct switchdev_trans *trans)
 {
-       struct b53_device *priv = ds_to_priv(ds);
+       struct b53_device *priv = ds->priv;
 
        /* 5325 and 5365 require some more massaging, but could
         * be supported eventually
@@ -1144,7 +1207,7 @@ static void b53_fdb_add(struct dsa_switch *ds, int port,
                        const struct switchdev_obj_port_fdb *fdb,
                        struct switchdev_trans *trans)
 {
-       struct b53_device *priv = ds_to_priv(ds);
+       struct b53_device *priv = ds->priv;
 
        if (b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, true))
                pr_err("%s: failed to add MAC address\n", __func__);
@@ -1153,7 +1216,7 @@ static void b53_fdb_add(struct dsa_switch *ds, int port,
 static int b53_fdb_del(struct dsa_switch *ds, int port,
                       const struct switchdev_obj_port_fdb *fdb)
 {
-       struct b53_device *priv = ds_to_priv(ds);
+       struct b53_device *priv = ds->priv;
 
        return b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, false);
 }
@@ -1212,7 +1275,7 @@ static int b53_fdb_dump(struct dsa_switch *ds, int port,
                        struct switchdev_obj_port_fdb *fdb,
                        int (*cb)(struct switchdev_obj *obj))
 {
-       struct b53_device *priv = ds_to_priv(ds);
+       struct b53_device *priv = ds->priv;
        struct net_device *dev = ds->ports[port].netdev;
        struct b53_arl_entry results[2];
        unsigned int count = 0;
@@ -1251,10 +1314,22 @@ static int b53_fdb_dump(struct dsa_switch *ds, int port,
 static int b53_br_join(struct dsa_switch *ds, int port,
                       struct net_device *bridge)
 {
-       struct b53_device *dev = ds_to_priv(ds);
+       struct b53_device *dev = ds->priv;
+       s8 cpu_port = ds->dst->cpu_port;
        u16 pvlan, reg;
        unsigned int i;
 
+       /* Make this port leave the all VLANs join since we will have proper
+        * VLAN entries from now on
+        */
+       if (is58xx(dev)) {
+               b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, &reg);
+               reg &= ~BIT(port);
+               if ((reg & BIT(cpu_port)) == BIT(cpu_port))
+                       reg &= ~BIT(cpu_port);
+               b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg);
+       }
+
        dev->ports[port].bridge_dev = bridge;
        b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), &pvlan);
 
@@ -1284,9 +1359,10 @@ static int b53_br_join(struct dsa_switch *ds, int port,
 
 static void b53_br_leave(struct dsa_switch *ds, int port)
 {
-       struct b53_device *dev = ds_to_priv(ds);
+       struct b53_device *dev = ds->priv;
        struct net_device *bridge = dev->ports[port].bridge_dev;
        struct b53_vlan *vl = &dev->vlans[0];
+       s8 cpu_port = ds->dst->cpu_port;
        unsigned int i;
        u16 pvlan, reg, pvid;
 
@@ -1316,16 +1392,25 @@ static void b53_br_leave(struct dsa_switch *ds, int port)
        else
                pvid = 0;
 
-       b53_get_vlan_entry(dev, pvid, vl);
-       vl->members |= BIT(port) | BIT(dev->cpu_port);
-       vl->untag |= BIT(port) | BIT(dev->cpu_port);
-       b53_set_vlan_entry(dev, pvid, vl);
+       /* Make this port join all VLANs without VLAN entries */
+       if (is58xx(dev)) {
+               b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, &reg);
+               reg |= BIT(port);
+               if (!(reg & BIT(cpu_port)))
+                       reg |= BIT(cpu_port);
+               b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg);
+       } else {
+               b53_get_vlan_entry(dev, pvid, vl);
+               vl->members |= BIT(port) | BIT(dev->cpu_port);
+               vl->untag |= BIT(port) | BIT(dev->cpu_port);
+               b53_set_vlan_entry(dev, pvid, vl);
+       }
 }
 
 static void b53_br_set_stp_state(struct dsa_switch *ds, int port,
                                 u8 state)
 {
-       struct b53_device *dev = ds_to_priv(ds);
+       struct b53_device *dev = ds->priv;
        u8 hw_state, cur_hw_state;
        u8 reg;
 
@@ -1373,8 +1458,13 @@ static void b53_br_set_stp_state(struct dsa_switch *ds, int port,
        b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg);
 }
 
-static struct dsa_switch_driver b53_switch_ops = {
-       .tag_protocol           = DSA_TAG_PROTO_NONE,
+static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds)
+{
+       return DSA_TAG_PROTO_NONE;
+}
+
+static struct dsa_switch_ops b53_switch_ops = {
+       .get_tag_protocol       = b53_get_tag_protocol,
        .setup                  = b53_setup,
        .set_addr               = b53_set_addr,
        .get_strings            = b53_get_strings,
@@ -1593,11 +1683,22 @@ static const struct b53_chip_data b53_switch_chips[] = {
                .jumbo_pm_reg = B53_JUMBO_PORT_MASK,
                .jumbo_size_reg = B53_JUMBO_MAX_SIZE,
        },
+       {
+               .chip_id = BCM7445_DEVICE_ID,
+               .dev_name = "BCM7445",
+               .vlans  = 4096,
+               .enabled_ports = 0x1ff,
+               .arl_entries = 4,
+               .cpu_port = B53_CPU_PORT,
+               .vta_regs = B53_VTA_REGS,
+               .duplex_reg = B53_DUPLEX_STAT_GE,
+               .jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+               .jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+       },
 };
 
 static int b53_switch_init(struct b53_device *dev)
 {
-       struct dsa_switch *ds = dev->ds;
        unsigned int i;
        int ret;
 
@@ -1613,7 +1714,6 @@ static int b53_switch_init(struct b53_device *dev)
                        dev->vta_regs[1] = chip->vta_regs[1];
                        dev->vta_regs[2] = chip->vta_regs[2];
                        dev->jumbo_pm_reg = chip->jumbo_pm_reg;
-                       ds->drv = &b53_switch_ops;
                        dev->cpu_port = chip->cpu_port;
                        dev->num_vlans = chip->vlans;
                        dev->num_arl_entries = chip->arl_entries;
@@ -1701,6 +1801,7 @@ struct b53_device *b53_switch_alloc(struct device *base,
        dev->ds = ds;
        dev->priv = priv;
        dev->ops = ops;
+       ds->ops = &b53_switch_ops;
        mutex_init(&dev->reg_mutex);
        mutex_init(&dev->stats_mutex);
 
index d268493..f192a67 100644 (file)
@@ -60,6 +60,7 @@ enum {
        BCM53018_DEVICE_ID = 0x53018,
        BCM53019_DEVICE_ID = 0x53019,
        BCM58XX_DEVICE_ID = 0x5800,
+       BCM7445_DEVICE_ID = 0x7445,
 };
 
 #define B53_N_PORTS    9
@@ -174,6 +175,12 @@ static inline int is5301x(struct b53_device *dev)
                dev->chip_id == BCM53019_DEVICE_ID;
 }
 
+static inline int is58xx(struct b53_device *dev)
+{
+       return dev->chip_id == BCM58XX_DEVICE_ID ||
+               dev->chip_id == BCM7445_DEVICE_ID;
+}
+
 #define B53_CPU_PORT_25        5
 #define B53_CPU_PORT   8
 
@@ -365,7 +372,6 @@ static inline void b53_arl_from_entry(u64 *mac_vid, u32 *fwd_entry,
 
 #ifdef CONFIG_BCM47XX
 
-#include <linux/version.h>
 #include <linux/bcm47xx_nvram.h>
 #include <bcm47xx_board.h>
 static inline int b53_switch_get_reset_gpio(struct b53_device *dev)
index a0b453e..dac0af4 100644 (file)
 /* Port VLAN mask (16 bit) IMP port is always 8, also on 5325 & co */
 #define B53_PVLAN_PORT_MASK(i)         ((i) * 2)
 
+/* Join all VLANs register (16 bit) */
+#define B53_JOIN_ALL_VLAN_EN           0x50
+
 /*************************************************************************
  * 802.1Q Page Registers
  *************************************************************************/
index b2b8387..e218887 100644 (file)
 #include <linux/brcmphy.h>
 #include <linux/etherdevice.h>
 #include <net/switchdev.h>
+#include <linux/platform_data/b53.h>
 
 #include "bcm_sf2.h"
 #include "bcm_sf2_regs.h"
+#include "b53/b53_priv.h"
+#include "b53/b53_regs.h"
 
-/* String, offset, and register size in bytes if different from 4 bytes */
-static const struct bcm_sf2_hw_stats bcm_sf2_mib[] = {
-       { "TxOctets",           0x000, 8        },
-       { "TxDropPkts",         0x020           },
-       { "TxQPKTQ0",           0x030           },
-       { "TxBroadcastPkts",    0x040           },
-       { "TxMulticastPkts",    0x050           },
-       { "TxUnicastPKts",      0x060           },
-       { "TxCollisions",       0x070           },
-       { "TxSingleCollision",  0x080           },
-       { "TxMultipleCollision", 0x090          },
-       { "TxDeferredCollision", 0x0a0          },
-       { "TxLateCollision",    0x0b0           },
-       { "TxExcessiveCollision", 0x0c0         },
-       { "TxFrameInDisc",      0x0d0           },
-       { "TxPausePkts",        0x0e0           },
-       { "TxQPKTQ1",           0x0f0           },
-       { "TxQPKTQ2",           0x100           },
-       { "TxQPKTQ3",           0x110           },
-       { "TxQPKTQ4",           0x120           },
-       { "TxQPKTQ5",           0x130           },
-       { "RxOctets",           0x140, 8        },
-       { "RxUndersizePkts",    0x160           },
-       { "RxPausePkts",        0x170           },
-       { "RxPkts64Octets",     0x180           },
-       { "RxPkts65to127Octets", 0x190          },
-       { "RxPkts128to255Octets", 0x1a0         },
-       { "RxPkts256to511Octets", 0x1b0         },
-       { "RxPkts512to1023Octets", 0x1c0        },
-       { "RxPkts1024toMaxPktsOctets", 0x1d0    },
-       { "RxOversizePkts",     0x1e0           },
-       { "RxJabbers",          0x1f0           },
-       { "RxAlignmentErrors",  0x200           },
-       { "RxFCSErrors",        0x210           },
-       { "RxGoodOctets",       0x220, 8        },
-       { "RxDropPkts",         0x240           },
-       { "RxUnicastPkts",      0x250           },
-       { "RxMulticastPkts",    0x260           },
-       { "RxBroadcastPkts",    0x270           },
-       { "RxSAChanges",        0x280           },
-       { "RxFragments",        0x290           },
-       { "RxJumboPkt",         0x2a0           },
-       { "RxSymblErr",         0x2b0           },
-       { "InRangeErrCount",    0x2c0           },
-       { "OutRangeErrCount",   0x2d0           },
-       { "EEELpiEvent",        0x2e0           },
-       { "EEELpiDuration",     0x2f0           },
-       { "RxDiscard",          0x300, 8        },
-       { "TxQPKTQ6",           0x320           },
-       { "TxQPKTQ7",           0x330           },
-       { "TxPkts64Octets",     0x340           },
-       { "TxPkts65to127Octets", 0x350          },
-       { "TxPkts128to255Octets", 0x360         },
-       { "TxPkts256to511Ocets", 0x370          },
-       { "TxPkts512to1023Ocets", 0x380         },
-       { "TxPkts1024toMaxPktOcets", 0x390      },
-};
-
-#define BCM_SF2_STATS_SIZE     ARRAY_SIZE(bcm_sf2_mib)
-
-static void bcm_sf2_sw_get_strings(struct dsa_switch *ds,
-                                  int port, uint8_t *data)
-{
-       unsigned int i;
-
-       for (i = 0; i < BCM_SF2_STATS_SIZE; i++)
-               memcpy(data + i * ETH_GSTRING_LEN,
-                      bcm_sf2_mib[i].string, ETH_GSTRING_LEN);
-}
-
-static void bcm_sf2_sw_get_ethtool_stats(struct dsa_switch *ds,
-                                        int port, uint64_t *data)
-{
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
-       const struct bcm_sf2_hw_stats *s;
-       unsigned int i;
-       u64 val = 0;
-       u32 offset;
-
-       mutex_lock(&priv->stats_mutex);
-
-       /* Now fetch the per-port counters */
-       for (i = 0; i < BCM_SF2_STATS_SIZE; i++) {
-               s = &bcm_sf2_mib[i];
-
-               /* Do a latched 64-bit read if needed */
-               offset = s->reg + CORE_P_MIB_OFFSET(port);
-               if (s->sizeof_stat == 8)
-                       val = core_readq(priv, offset);
-               else
-                       val = core_readl(priv, offset);
-
-               data[i] = (u64)val;
-       }
-
-       mutex_unlock(&priv->stats_mutex);
-}
-
-static int bcm_sf2_sw_get_sset_count(struct dsa_switch *ds)
-{
-       return BCM_SF2_STATS_SIZE;
-}
-
-static const char *bcm_sf2_sw_drv_probe(struct device *dsa_dev,
-                                       struct device *host_dev, int sw_addr,
-                                       void **_priv)
+static enum dsa_tag_protocol bcm_sf2_sw_get_tag_protocol(struct dsa_switch *ds)
 {
-       struct bcm_sf2_priv *priv;
-
-       priv = devm_kzalloc(dsa_dev, sizeof(*priv), GFP_KERNEL);
-       if (!priv)
-               return NULL;
-       *_priv = priv;
-
-       return "Broadcom Starfighter 2";
+       return DSA_TAG_PROTO_BRCM;
 }
 
 static void bcm_sf2_imp_vlan_setup(struct dsa_switch *ds, int cpu_port)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        unsigned int i;
        u32 reg;
 
@@ -172,7 +63,7 @@ static void bcm_sf2_imp_vlan_setup(struct dsa_switch *ds, int cpu_port)
 
 static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        u32 reg, val;
 
        /* Enable the port memories */
@@ -237,7 +128,7 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
 
 static void bcm_sf2_eee_enable_set(struct dsa_switch *ds, int port, bool enable)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        u32 reg;
 
        reg = core_readl(priv, CORE_EEE_EN_CTRL);
@@ -250,7 +141,7 @@ static void bcm_sf2_eee_enable_set(struct dsa_switch *ds, int port, bool enable)
 
 static void bcm_sf2_gphy_enable_set(struct dsa_switch *ds, bool enable)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        u32 reg;
 
        reg = reg_readl(priv, REG_SPHY_CNTRL);
@@ -324,7 +215,7 @@ static inline void bcm_sf2_port_intr_disable(struct bcm_sf2_priv *priv,
 static int bcm_sf2_port_setup(struct dsa_switch *ds, int port,
                              struct phy_device *phy)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        s8 cpu_port = ds->dst[ds->index].cpu_port;
        u32 reg;
 
@@ -365,7 +256,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port,
        reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port));
        reg &= ~PORT_VLAN_CTRL_MASK;
        reg |= (1 << port);
-       reg |= priv->port_sts[port].vlan_ctl_mask;
+       reg |= priv->dev->ports[port].vlan_ctl_mask;
        core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(port));
 
        bcm_sf2_imp_vlan_setup(ds, cpu_port);
@@ -380,7 +271,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port,
 static void bcm_sf2_port_disable(struct dsa_switch *ds, int port,
                                 struct phy_device *phy)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        u32 off, reg;
 
        if (priv->wol_ports_mask & (1 << port))
@@ -412,7 +303,7 @@ static void bcm_sf2_port_disable(struct dsa_switch *ds, int port,
 static int bcm_sf2_eee_init(struct dsa_switch *ds, int port,
                            struct phy_device *phy)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        struct ethtool_eee *p = &priv->port_sts[port].eee;
        int ret;
 
@@ -430,7 +321,7 @@ static int bcm_sf2_eee_init(struct dsa_switch *ds, int port,
 static int bcm_sf2_sw_get_eee(struct dsa_switch *ds, int port,
                              struct ethtool_eee *e)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        struct ethtool_eee *p = &priv->port_sts[port].eee;
        u32 reg;
 
@@ -445,7 +336,7 @@ static int bcm_sf2_sw_set_eee(struct dsa_switch *ds, int port,
                              struct phy_device *phydev,
                              struct ethtool_eee *e)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        struct ethtool_eee *p = &priv->port_sts[port].eee;
 
        p->eee_enabled = e->eee_enabled;
@@ -461,469 +352,6 @@ static int bcm_sf2_sw_set_eee(struct dsa_switch *ds, int port,
        return 0;
 }
 
-static int bcm_sf2_fast_age_op(struct bcm_sf2_priv *priv)
-{
-       unsigned int timeout = 1000;
-       u32 reg;
-
-       reg = core_readl(priv, CORE_FAST_AGE_CTRL);
-       reg |= EN_AGE_PORT | EN_AGE_VLAN | EN_AGE_DYNAMIC | FAST_AGE_STR_DONE;
-       core_writel(priv, reg, CORE_FAST_AGE_CTRL);
-
-       do {
-               reg = core_readl(priv, CORE_FAST_AGE_CTRL);
-               if (!(reg & FAST_AGE_STR_DONE))
-                       break;
-
-               cpu_relax();
-       } while (timeout--);
-
-       if (!timeout)
-               return -ETIMEDOUT;
-
-       core_writel(priv, 0, CORE_FAST_AGE_CTRL);
-
-       return 0;
-}
-
-/* Fast-ageing of ARL entries for a given port, equivalent to an ARL
- * flush for that port.
- */
-static int bcm_sf2_sw_fast_age_port(struct dsa_switch *ds, int port)
-{
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
-
-       core_writel(priv, port, CORE_FAST_AGE_PORT);
-
-       return bcm_sf2_fast_age_op(priv);
-}
-
-static int bcm_sf2_sw_fast_age_vlan(struct bcm_sf2_priv *priv, u16 vid)
-{
-       core_writel(priv, vid, CORE_FAST_AGE_VID);
-
-       return bcm_sf2_fast_age_op(priv);
-}
-
-static int bcm_sf2_vlan_op_wait(struct bcm_sf2_priv *priv)
-{
-       unsigned int timeout = 10;
-       u32 reg;
-
-       do {
-               reg = core_readl(priv, CORE_ARLA_VTBL_RWCTRL);
-               if (!(reg & ARLA_VTBL_STDN))
-                       return 0;
-
-               usleep_range(1000, 2000);
-       } while (timeout--);
-
-       return -ETIMEDOUT;
-}
-
-static int bcm_sf2_vlan_op(struct bcm_sf2_priv *priv, u8 op)
-{
-       core_writel(priv, ARLA_VTBL_STDN | op, CORE_ARLA_VTBL_RWCTRL);
-
-       return bcm_sf2_vlan_op_wait(priv);
-}
-
-static void bcm_sf2_set_vlan_entry(struct bcm_sf2_priv *priv, u16 vid,
-                                  struct bcm_sf2_vlan *vlan)
-{
-       int ret;
-
-       core_writel(priv, vid & VTBL_ADDR_INDEX_MASK, CORE_ARLA_VTBL_ADDR);
-       core_writel(priv, vlan->untag << UNTAG_MAP_SHIFT | vlan->members,
-                   CORE_ARLA_VTBL_ENTRY);
-
-       ret = bcm_sf2_vlan_op(priv, ARLA_VTBL_CMD_WRITE);
-       if (ret)
-               pr_err("failed to write VLAN entry\n");
-}
-
-static int bcm_sf2_get_vlan_entry(struct bcm_sf2_priv *priv, u16 vid,
-                                 struct bcm_sf2_vlan *vlan)
-{
-       u32 entry;
-       int ret;
-
-       core_writel(priv, vid & VTBL_ADDR_INDEX_MASK, CORE_ARLA_VTBL_ADDR);
-
-       ret = bcm_sf2_vlan_op(priv, ARLA_VTBL_CMD_READ);
-       if (ret)
-               return ret;
-
-       entry = core_readl(priv, CORE_ARLA_VTBL_ENTRY);
-       vlan->members = entry & FWD_MAP_MASK;
-       vlan->untag = (entry >> UNTAG_MAP_SHIFT) & UNTAG_MAP_MASK;
-
-       return 0;
-}
-
-static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int port,
-                             struct net_device *bridge)
-{
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
-       s8 cpu_port = ds->dst->cpu_port;
-       unsigned int i;
-       u32 reg, p_ctl;
-
-       /* Make this port leave the all VLANs join since we will have proper
-        * VLAN entries from now on
-        */
-       reg = core_readl(priv, CORE_JOIN_ALL_VLAN_EN);
-       reg &= ~BIT(port);
-       if ((reg & BIT(cpu_port)) == BIT(cpu_port))
-               reg &= ~BIT(cpu_port);
-       core_writel(priv, reg, CORE_JOIN_ALL_VLAN_EN);
-
-       priv->port_sts[port].bridge_dev = bridge;
-       p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port));
-
-       for (i = 0; i < priv->hw_params.num_ports; i++) {
-               if (priv->port_sts[i].bridge_dev != bridge)
-                       continue;
-
-               /* Add this local port to the remote port VLAN control
-                * membership and update the remote port bitmask
-                */
-               reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i));
-               reg |= 1 << port;
-               core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i));
-               priv->port_sts[i].vlan_ctl_mask = reg;
-
-               p_ctl |= 1 << i;
-       }
-
-       /* Configure the local port VLAN control membership to include
-        * remote ports and update the local port bitmask
-        */
-       core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port));
-       priv->port_sts[port].vlan_ctl_mask = p_ctl;
-
-       return 0;
-}
-
-static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port)
-{
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
-       struct net_device *bridge = priv->port_sts[port].bridge_dev;
-       s8 cpu_port = ds->dst->cpu_port;
-       unsigned int i;
-       u32 reg, p_ctl;
-
-       p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port));
-
-       for (i = 0; i < priv->hw_params.num_ports; i++) {
-               /* Don't touch the remaining ports */
-               if (priv->port_sts[i].bridge_dev != bridge)
-                       continue;
-
-               reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i));
-               reg &= ~(1 << port);
-               core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i));
-               priv->port_sts[port].vlan_ctl_mask = reg;
-
-               /* Prevent self removal to preserve isolation */
-               if (port != i)
-                       p_ctl &= ~(1 << i);
-       }
-
-       core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port));
-       priv->port_sts[port].vlan_ctl_mask = p_ctl;
-       priv->port_sts[port].bridge_dev = NULL;
-
-       /* Make this port join all VLANs without VLAN entries */
-       reg = core_readl(priv, CORE_JOIN_ALL_VLAN_EN);
-       reg |= BIT(port);
-       if (!(reg & BIT(cpu_port)))
-               reg |= BIT(cpu_port);
-       core_writel(priv, reg, CORE_JOIN_ALL_VLAN_EN);
-}
-
-static void bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port,
-                                       u8 state)
-{
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
-       u8 hw_state, cur_hw_state;
-       u32 reg;
-
-       reg = core_readl(priv, CORE_G_PCTL_PORT(port));
-       cur_hw_state = reg & (G_MISTP_STATE_MASK << G_MISTP_STATE_SHIFT);
-
-       switch (state) {
-       case BR_STATE_DISABLED:
-               hw_state = G_MISTP_DIS_STATE;
-               break;
-       case BR_STATE_LISTENING:
-               hw_state = G_MISTP_LISTEN_STATE;
-               break;
-       case BR_STATE_LEARNING:
-               hw_state = G_MISTP_LEARN_STATE;
-               break;
-       case BR_STATE_FORWARDING:
-               hw_state = G_MISTP_FWD_STATE;
-               break;
-       case BR_STATE_BLOCKING:
-               hw_state = G_MISTP_BLOCK_STATE;
-               break;
-       default:
-               pr_err("%s: invalid STP state: %d\n", __func__, state);
-               return;
-       }
-
-       /* Fast-age ARL entries if we are moving a port from Learning or
-        * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening
-        * state (hw_state)
-        */
-       if (cur_hw_state != hw_state) {
-               if (cur_hw_state >= G_MISTP_LEARN_STATE &&
-                   hw_state <= G_MISTP_LISTEN_STATE) {
-                       if (bcm_sf2_sw_fast_age_port(ds, port)) {
-                               pr_err("%s: fast-ageing failed\n", __func__);
-                               return;
-                       }
-               }
-       }
-
-       reg = core_readl(priv, CORE_G_PCTL_PORT(port));
-       reg &= ~(G_MISTP_STATE_MASK << G_MISTP_STATE_SHIFT);
-       reg |= hw_state;
-       core_writel(priv, reg, CORE_G_PCTL_PORT(port));
-}
-
-/* Address Resolution Logic routines */
-static int bcm_sf2_arl_op_wait(struct bcm_sf2_priv *priv)
-{
-       unsigned int timeout = 10;
-       u32 reg;
-
-       do {
-               reg = core_readl(priv, CORE_ARLA_RWCTL);
-               if (!(reg & ARL_STRTDN))
-                       return 0;
-
-               usleep_range(1000, 2000);
-       } while (timeout--);
-
-       return -ETIMEDOUT;
-}
-
-static int bcm_sf2_arl_rw_op(struct bcm_sf2_priv *priv, unsigned int op)
-{
-       u32 cmd;
-
-       if (op > ARL_RW)
-               return -EINVAL;
-
-       cmd = core_readl(priv, CORE_ARLA_RWCTL);
-       cmd &= ~IVL_SVL_SELECT;
-       cmd |= ARL_STRTDN;
-       if (op)
-               cmd |= ARL_RW;
-       else
-               cmd &= ~ARL_RW;
-       core_writel(priv, cmd, CORE_ARLA_RWCTL);
-
-       return bcm_sf2_arl_op_wait(priv);
-}
-
-static int bcm_sf2_arl_read(struct bcm_sf2_priv *priv, u64 mac,
-                           u16 vid, struct bcm_sf2_arl_entry *ent, u8 *idx,
-                           bool is_valid)
-{
-       unsigned int i;
-       int ret;
-
-       ret = bcm_sf2_arl_op_wait(priv);
-       if (ret)
-               return ret;
-
-       /* Read the 4 bins */
-       for (i = 0; i < 4; i++) {
-               u64 mac_vid;
-               u32 fwd_entry;
-
-               mac_vid = core_readq(priv, CORE_ARLA_MACVID_ENTRY(i));
-               fwd_entry = core_readl(priv, CORE_ARLA_FWD_ENTRY(i));
-               bcm_sf2_arl_to_entry(ent, mac_vid, fwd_entry);
-
-               if (ent->is_valid && is_valid) {
-                       *idx = i;
-                       return 0;
-               }
-
-               /* This is the MAC we just deleted */
-               if (!is_valid && (mac_vid & mac))
-                       return 0;
-       }
-
-       return -ENOENT;
-}
-
-static int bcm_sf2_arl_op(struct bcm_sf2_priv *priv, int op, int port,
-                         const unsigned char *addr, u16 vid, bool is_valid)
-{
-       struct bcm_sf2_arl_entry ent;
-       u32 fwd_entry;
-       u64 mac, mac_vid = 0;
-       u8 idx = 0;
-       int ret;
-
-       /* Convert the array into a 64-bit MAC */
-       mac = bcm_sf2_mac_to_u64(addr);
-
-       /* Perform a read for the given MAC and VID */
-       core_writeq(priv, mac, CORE_ARLA_MAC);
-       core_writel(priv, vid, CORE_ARLA_VID);
-
-       /* Issue a read operation for this MAC */
-       ret = bcm_sf2_arl_rw_op(priv, 1);
-       if (ret)
-               return ret;
-
-       ret = bcm_sf2_arl_read(priv, mac, vid, &ent, &idx, is_valid);
-       /* If this is a read, just finish now */
-       if (op)
-               return ret;
-
-       /* We could not find a matching MAC, so reset to a new entry */
-       if (ret) {
-               fwd_entry = 0;
-               idx = 0;
-       }
-
-       memset(&ent, 0, sizeof(ent));
-       ent.port = port;
-       ent.is_valid = is_valid;
-       ent.vid = vid;
-       ent.is_static = true;
-       memcpy(ent.mac, addr, ETH_ALEN);
-       bcm_sf2_arl_from_entry(&mac_vid, &fwd_entry, &ent);
-
-       core_writeq(priv, mac_vid, CORE_ARLA_MACVID_ENTRY(idx));
-       core_writel(priv, fwd_entry, CORE_ARLA_FWD_ENTRY(idx));
-
-       ret = bcm_sf2_arl_rw_op(priv, 0);
-       if (ret)
-               return ret;
-
-       /* Re-read the entry to check */
-       return bcm_sf2_arl_read(priv, mac, vid, &ent, &idx, is_valid);
-}
-
-static int bcm_sf2_sw_fdb_prepare(struct dsa_switch *ds, int port,
-                                 const struct switchdev_obj_port_fdb *fdb,
-                                 struct switchdev_trans *trans)
-{
-       /* We do not need to do anything specific here yet */
-       return 0;
-}
-
-static void bcm_sf2_sw_fdb_add(struct dsa_switch *ds, int port,
-                              const struct switchdev_obj_port_fdb *fdb,
-                              struct switchdev_trans *trans)
-{
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
-
-       if (bcm_sf2_arl_op(priv, 0, port, fdb->addr, fdb->vid, true))
-               pr_err("%s: failed to add MAC address\n", __func__);
-}
-
-static int bcm_sf2_sw_fdb_del(struct dsa_switch *ds, int port,
-                             const struct switchdev_obj_port_fdb *fdb)
-{
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
-
-       return bcm_sf2_arl_op(priv, 0, port, fdb->addr, fdb->vid, false);
-}
-
-static int bcm_sf2_arl_search_wait(struct bcm_sf2_priv *priv)
-{
-       unsigned timeout = 1000;
-       u32 reg;
-
-       do {
-               reg = core_readl(priv, CORE_ARLA_SRCH_CTL);
-               if (!(reg & ARLA_SRCH_STDN))
-                       return 0;
-
-               if (reg & ARLA_SRCH_VLID)
-                       return 0;
-
-               usleep_range(1000, 2000);
-       } while (timeout--);
-
-       return -ETIMEDOUT;
-}
-
-static void bcm_sf2_arl_search_rd(struct bcm_sf2_priv *priv, u8 idx,
-                                 struct bcm_sf2_arl_entry *ent)
-{
-       u64 mac_vid;
-       u32 fwd_entry;
-
-       mac_vid = core_readq(priv, CORE_ARLA_SRCH_RSLT_MACVID(idx));
-       fwd_entry = core_readl(priv, CORE_ARLA_SRCH_RSLT(idx));
-       bcm_sf2_arl_to_entry(ent, mac_vid, fwd_entry);
-}
-
-static int bcm_sf2_sw_fdb_copy(struct net_device *dev, int port,
-                              const struct bcm_sf2_arl_entry *ent,
-                              struct switchdev_obj_port_fdb *fdb,
-                              int (*cb)(struct switchdev_obj *obj))
-{
-       if (!ent->is_valid)
-               return 0;
-
-       if (port != ent->port)
-               return 0;
-
-       ether_addr_copy(fdb->addr, ent->mac);
-       fdb->vid = ent->vid;
-       fdb->ndm_state = ent->is_static ? NUD_NOARP : NUD_REACHABLE;
-
-       return cb(&fdb->obj);
-}
-
-static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, int port,
-                              struct switchdev_obj_port_fdb *fdb,
-                              int (*cb)(struct switchdev_obj *obj))
-{
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
-       struct net_device *dev = ds->ports[port].netdev;
-       struct bcm_sf2_arl_entry results[2];
-       unsigned int count = 0;
-       int ret;
-
-       /* Start search operation */
-       core_writel(priv, ARLA_SRCH_STDN, CORE_ARLA_SRCH_CTL);
-
-       do {
-               ret = bcm_sf2_arl_search_wait(priv);
-               if (ret)
-                       return ret;
-
-               /* Read both entries, then return their values back */
-               bcm_sf2_arl_search_rd(priv, 0, &results[0]);
-               ret = bcm_sf2_sw_fdb_copy(dev, port, &results[0], fdb, cb);
-               if (ret)
-                       return ret;
-
-               bcm_sf2_arl_search_rd(priv, 1, &results[1]);
-               ret = bcm_sf2_sw_fdb_copy(dev, port, &results[1], fdb, cb);
-               if (ret)
-                       return ret;
-
-               if (!results[0].is_valid && !results[1].is_valid)
-                       break;
-
-       } while (count++ < CORE_ARLA_NUM_ENTRIES);
-
-       return 0;
-}
-
 static int bcm_sf2_sw_indir_rw(struct bcm_sf2_priv *priv, int op, int addr,
                               int regnum, u16 val)
 {
@@ -1036,12 +464,10 @@ static int bcm_sf2_sw_rst(struct bcm_sf2_priv *priv)
 
 static void bcm_sf2_intr_disable(struct bcm_sf2_priv *priv)
 {
-       intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_MASK_SET);
+       intrl2_0_mask_set(priv, 0xffffffff);
        intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR);
-       intrl2_0_writel(priv, 0, INTRL2_CPU_MASK_CLEAR);
-       intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_MASK_SET);
+       intrl2_1_mask_set(priv, 0xffffffff);
        intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR);
-       intrl2_1_writel(priv, 0, INTRL2_CPU_MASK_CLEAR);
 }
 
 static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv,
@@ -1082,7 +508,7 @@ static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv,
 
 static int bcm_sf2_mdio_register(struct dsa_switch *ds)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        struct device_node *dn;
        static int index;
        int err;
@@ -1146,14 +572,9 @@ static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv)
                of_node_put(priv->master_mii_dn);
 }
 
-static int bcm_sf2_sw_set_addr(struct dsa_switch *ds, u8 *addr)
-{
-       return 0;
-}
-
 static u32 bcm_sf2_sw_get_phy_flags(struct dsa_switch *ds, int port)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 
        /* The BCM7xxx PHY driver expects to find the integrated PHY revision
         * in bits 15:8 and the patch level in bits 7:0 which is exactly what
@@ -1166,7 +587,7 @@ static u32 bcm_sf2_sw_get_phy_flags(struct dsa_switch *ds, int port)
 static void bcm_sf2_sw_adjust_link(struct dsa_switch *ds, int port,
                                   struct phy_device *phydev)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        u32 id_mode_dis = 0, port_mode;
        const char *str = NULL;
        u32 reg;
@@ -1246,7 +667,7 @@ force_link:
 static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port,
                                         struct fixed_phy_status *status)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        u32 duplex, pause;
        u32 reg;
 
@@ -1298,7 +719,7 @@ static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port,
 
 static int bcm_sf2_sw_suspend(struct dsa_switch *ds)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        unsigned int port;
 
        bcm_sf2_intr_disable(priv);
@@ -1318,7 +739,7 @@ static int bcm_sf2_sw_suspend(struct dsa_switch *ds)
 
 static int bcm_sf2_sw_resume(struct dsa_switch *ds)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        unsigned int port;
        int ret;
 
@@ -1345,7 +766,7 @@ static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port,
                               struct ethtool_wolinfo *wol)
 {
        struct net_device *p = ds->dst[ds->index].master_netdev;
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        struct ethtool_wolinfo pwol;
 
        /* Get the parent device WoL settings */
@@ -1368,7 +789,7 @@ static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port,
                              struct ethtool_wolinfo *wol)
 {
        struct net_device *p = ds->dst[ds->index].master_netdev;
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        s8 cpu_port = ds->dst[ds->index].cpu_port;
        struct ethtool_wolinfo pwol;
 
@@ -1393,43 +814,32 @@ static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port,
        return p->ethtool_ops->set_wol(p, wol);
 }
 
-static void bcm_sf2_enable_vlan(struct bcm_sf2_priv *priv, bool enable)
+static int bcm_sf2_vlan_op_wait(struct bcm_sf2_priv *priv)
 {
-       u32 mgmt, vc0, vc1, vc4, vc5;
+       unsigned int timeout = 10;
+       u32 reg;
 
-       mgmt = core_readl(priv, CORE_SWMODE);
-       vc0 = core_readl(priv, CORE_VLAN_CTRL0);
-       vc1 = core_readl(priv, CORE_VLAN_CTRL1);
-       vc4 = core_readl(priv, CORE_VLAN_CTRL4);
-       vc5 = core_readl(priv, CORE_VLAN_CTRL5);
+       do {
+               reg = core_readl(priv, CORE_ARLA_VTBL_RWCTRL);
+               if (!(reg & ARLA_VTBL_STDN))
+                       return 0;
 
-       mgmt &= ~SW_FWDG_MODE;
+               usleep_range(1000, 2000);
+       } while (timeout--);
 
-       if (enable) {
-               vc0 |= VLAN_EN | VLAN_LEARN_MODE_IVL;
-               vc1 |= EN_RSV_MCAST_UNTAG | EN_RSV_MCAST_FWDMAP;
-               vc4 &= ~(INGR_VID_CHK_MASK << INGR_VID_CHK_SHIFT);
-               vc4 |= INGR_VID_CHK_DROP;
-               vc5 |= DROP_VTABLE_MISS | EN_VID_FFF_FWD;
-       } else {
-               vc0 &= ~(VLAN_EN | VLAN_LEARN_MODE_IVL);
-               vc1 &= ~(EN_RSV_MCAST_UNTAG | EN_RSV_MCAST_FWDMAP);
-               vc4 &= ~(INGR_VID_CHK_MASK << INGR_VID_CHK_SHIFT);
-               vc5 &= ~(DROP_VTABLE_MISS | EN_VID_FFF_FWD);
-               vc4 |= INGR_VID_CHK_VID_VIOL_IMP;
-       }
+       return -ETIMEDOUT;
+}
+
+static int bcm_sf2_vlan_op(struct bcm_sf2_priv *priv, u8 op)
+{
+       core_writel(priv, ARLA_VTBL_STDN | op, CORE_ARLA_VTBL_RWCTRL);
 
-       core_writel(priv, vc0, CORE_VLAN_CTRL0);
-       core_writel(priv, vc1, CORE_VLAN_CTRL1);
-       core_writel(priv, 0, CORE_VLAN_CTRL3);
-       core_writel(priv, vc4, CORE_VLAN_CTRL4);
-       core_writel(priv, vc5, CORE_VLAN_CTRL5);
-       core_writel(priv, mgmt, CORE_SWMODE);
+       return bcm_sf2_vlan_op_wait(priv);
 }
 
 static void bcm_sf2_sw_configure_vlan(struct dsa_switch *ds)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
        unsigned int port;
 
        /* Clear all VLANs */
@@ -1443,162 +853,199 @@ static void bcm_sf2_sw_configure_vlan(struct dsa_switch *ds)
        }
 }
 
-static int bcm_sf2_sw_vlan_filtering(struct dsa_switch *ds, int port,
-                                    bool vlan_filtering)
+static int bcm_sf2_sw_setup(struct dsa_switch *ds)
 {
+       struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+       unsigned int port;
+
+       /* Enable all valid ports and disable those unused */
+       for (port = 0; port < priv->hw_params.num_ports; port++) {
+               /* IMP port receives special treatment */
+               if ((1 << port) & ds->enabled_port_mask)
+                       bcm_sf2_port_setup(ds, port, NULL);
+               else if (dsa_is_cpu_port(ds, port))
+                       bcm_sf2_imp_setup(ds, port);
+               else
+                       bcm_sf2_port_disable(ds, port, NULL);
+       }
+
+       bcm_sf2_sw_configure_vlan(ds);
+
        return 0;
 }
 
-static int bcm_sf2_sw_vlan_prepare(struct dsa_switch *ds, int port,
-                                  const struct switchdev_obj_port_vlan *vlan,
-                                  struct switchdev_trans *trans)
+/* The SWITCH_CORE register space is managed by b53 but operates on a page +
+ * register basis so we need to translate that into an address that the
+ * bus-glue understands.
+ */
+#define SF2_PAGE_REG_MKADDR(page, reg) ((page) << 10 | (reg) << 2)
+
+static int bcm_sf2_core_read8(struct b53_device *dev, u8 page, u8 reg,
+                             u8 *val)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
+       struct bcm_sf2_priv *priv = dev->priv;
 
-       bcm_sf2_enable_vlan(priv, true);
+       *val = core_readl(priv, SF2_PAGE_REG_MKADDR(page, reg));
 
        return 0;
 }
 
-static void bcm_sf2_sw_vlan_add(struct dsa_switch *ds, int port,
-                               const struct switchdev_obj_port_vlan *vlan,
-                               struct switchdev_trans *trans)
+static int bcm_sf2_core_read16(struct b53_device *dev, u8 page, u8 reg,
+                              u16 *val)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
-       bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
-       bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
-       s8 cpu_port = ds->dst->cpu_port;
-       struct bcm_sf2_vlan *vl;
-       u16 vid;
+       struct bcm_sf2_priv *priv = dev->priv;
 
-       for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
-               vl = &priv->vlans[vid];
+       *val = core_readl(priv, SF2_PAGE_REG_MKADDR(page, reg));
 
-               bcm_sf2_get_vlan_entry(priv, vid, vl);
+       return 0;
+}
 
-               vl->members |= BIT(port) | BIT(cpu_port);
-               if (untagged)
-                       vl->untag |= BIT(port) | BIT(cpu_port);
-               else
-                       vl->untag &= ~(BIT(port) | BIT(cpu_port));
+static int bcm_sf2_core_read32(struct b53_device *dev, u8 page, u8 reg,
+                              u32 *val)
+{
+       struct bcm_sf2_priv *priv = dev->priv;
 
-               bcm_sf2_set_vlan_entry(priv, vid, vl);
-               bcm_sf2_sw_fast_age_vlan(priv, vid);
-       }
+       *val = core_readl(priv, SF2_PAGE_REG_MKADDR(page, reg));
 
-       if (pvid) {
-               core_writel(priv, vlan->vid_end, CORE_DEFAULT_1Q_TAG_P(port));
-               core_writel(priv, vlan->vid_end,
-                           CORE_DEFAULT_1Q_TAG_P(cpu_port));
-               bcm_sf2_sw_fast_age_vlan(priv, vid);
-       }
+       return 0;
 }
 
-static int bcm_sf2_sw_vlan_del(struct dsa_switch *ds, int port,
-                              const struct switchdev_obj_port_vlan *vlan)
+static int bcm_sf2_core_read64(struct b53_device *dev, u8 page, u8 reg,
+                              u64 *val)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
-       bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
-       s8 cpu_port = ds->dst->cpu_port;
-       struct bcm_sf2_vlan *vl;
-       u16 vid, pvid;
-       int ret;
+       struct bcm_sf2_priv *priv = dev->priv;
 
-       pvid = core_readl(priv, CORE_DEFAULT_1Q_TAG_P(port));
+       *val = core_readq(priv, SF2_PAGE_REG_MKADDR(page, reg));
 
-       for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
-               vl = &priv->vlans[vid];
-
-               ret = bcm_sf2_get_vlan_entry(priv, vid, vl);
-               if (ret)
-                       return ret;
-
-               vl->members &= ~BIT(port);
-               if ((vl->members & BIT(cpu_port)) == BIT(cpu_port))
-                       vl->members = 0;
-               if (pvid == vid)
-                       pvid = 0;
-               if (untagged) {
-                       vl->untag &= ~BIT(port);
-                       if ((vl->untag & BIT(port)) == BIT(cpu_port))
-                               vl->untag = 0;
-               }
+       return 0;
+}
 
-               bcm_sf2_set_vlan_entry(priv, vid, vl);
-               bcm_sf2_sw_fast_age_vlan(priv, vid);
-       }
+static int bcm_sf2_core_write8(struct b53_device *dev, u8 page, u8 reg,
+                              u8 value)
+{
+       struct bcm_sf2_priv *priv = dev->priv;
 
-       core_writel(priv, pvid, CORE_DEFAULT_1Q_TAG_P(port));
-       core_writel(priv, pvid, CORE_DEFAULT_1Q_TAG_P(cpu_port));
-       bcm_sf2_sw_fast_age_vlan(priv, vid);
+       core_writel(priv, value, SF2_PAGE_REG_MKADDR(page, reg));
 
        return 0;
 }
 
-static int bcm_sf2_sw_vlan_dump(struct dsa_switch *ds, int port,
-                               struct switchdev_obj_port_vlan *vlan,
-                               int (*cb)(struct switchdev_obj *obj))
+static int bcm_sf2_core_write16(struct b53_device *dev, u8 page, u8 reg,
+                               u16 value)
 {
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
-       struct bcm_sf2_port_status *p = &priv->port_sts[port];
-       struct bcm_sf2_vlan *vl;
-       u16 vid, pvid;
-       int err = 0;
+       struct bcm_sf2_priv *priv = dev->priv;
 
-       pvid = core_readl(priv, CORE_DEFAULT_1Q_TAG_P(port));
+       core_writel(priv, value, SF2_PAGE_REG_MKADDR(page, reg));
 
-       for (vid = 0; vid < VLAN_N_VID; vid++) {
-               vl = &priv->vlans[vid];
+       return 0;
+}
 
-               if (!(vl->members & BIT(port)))
-                       continue;
+static int bcm_sf2_core_write32(struct b53_device *dev, u8 page, u8 reg,
+                               u32 value)
+{
+       struct bcm_sf2_priv *priv = dev->priv;
 
-               vlan->vid_begin = vlan->vid_end = vid;
-               vlan->flags = 0;
+       core_writel(priv, value, SF2_PAGE_REG_MKADDR(page, reg));
 
-               if (vl->untag & BIT(port))
-                       vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
-               if (p->pvid == vid)
-                       vlan->flags |= BRIDGE_VLAN_INFO_PVID;
+       return 0;
+}
 
-               err = cb(&vlan->obj);
-               if (err)
-                       break;
-       }
+static int bcm_sf2_core_write64(struct b53_device *dev, u8 page, u8 reg,
+                               u64 value)
+{
+       struct bcm_sf2_priv *priv = dev->priv;
 
-       return err;
+       core_writeq(priv, value, SF2_PAGE_REG_MKADDR(page, reg));
+
+       return 0;
 }
 
-static int bcm_sf2_sw_setup(struct dsa_switch *ds)
+static struct b53_io_ops bcm_sf2_io_ops = {
+       .read8  = bcm_sf2_core_read8,
+       .read16 = bcm_sf2_core_read16,
+       .read32 = bcm_sf2_core_read32,
+       .read48 = bcm_sf2_core_read64,
+       .read64 = bcm_sf2_core_read64,
+       .write8 = bcm_sf2_core_write8,
+       .write16 = bcm_sf2_core_write16,
+       .write32 = bcm_sf2_core_write32,
+       .write48 = bcm_sf2_core_write64,
+       .write64 = bcm_sf2_core_write64,
+};
+
+static int bcm_sf2_sw_probe(struct platform_device *pdev)
 {
        const char *reg_names[BCM_SF2_REGS_NUM] = BCM_SF2_REGS_NAME;
-       struct bcm_sf2_priv *priv = ds_to_priv(ds);
-       struct device_node *dn;
+       struct device_node *dn = pdev->dev.of_node;
+       struct b53_platform_data *pdata;
+       struct bcm_sf2_priv *priv;
+       struct b53_device *dev;
+       struct dsa_switch *ds;
        void __iomem **base;
-       unsigned int port;
+       struct resource *r;
        unsigned int i;
        u32 reg, rev;
        int ret;
 
+       priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       dev = b53_switch_alloc(&pdev->dev, &bcm_sf2_io_ops, priv);
+       if (!dev)
+               return -ENOMEM;
+
+       pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+       if (!pdata)
+               return -ENOMEM;
+
+       /* Auto-detection using standard registers will not work, so
+        * provide an indication of what kind of device we are for
+        * b53_common to work with
+        */
+       pdata->chip_id = BCM7445_DEVICE_ID;
+       dev->pdata = pdata;
+
+       priv->dev = dev;
+       ds = dev->ds;
+
+       /* Override the parts that are non-standard wrt. normal b53 devices */
+       ds->ops->get_tag_protocol = bcm_sf2_sw_get_tag_protocol;
+       ds->ops->setup = bcm_sf2_sw_setup;
+       ds->ops->get_phy_flags = bcm_sf2_sw_get_phy_flags;
+       ds->ops->adjust_link = bcm_sf2_sw_adjust_link;
+       ds->ops->fixed_link_update = bcm_sf2_sw_fixed_link_update;
+       ds->ops->suspend = bcm_sf2_sw_suspend;
+       ds->ops->resume = bcm_sf2_sw_resume;
+       ds->ops->get_wol = bcm_sf2_sw_get_wol;
+       ds->ops->set_wol = bcm_sf2_sw_set_wol;
+       ds->ops->port_enable = bcm_sf2_port_setup;
+       ds->ops->port_disable = bcm_sf2_port_disable;
+       ds->ops->get_eee = bcm_sf2_sw_get_eee;
+       ds->ops->set_eee = bcm_sf2_sw_set_eee;
+
+       /* Avoid having DSA free our slave MDIO bus (checking for
+        * ds->slave_mii_bus and ds->ops->phy_read being non-NULL)
+        */
+       ds->ops->phy_read = NULL;
+
+       dev_set_drvdata(&pdev->dev, priv);
+
        spin_lock_init(&priv->indir_lock);
        mutex_init(&priv->stats_mutex);
 
-       /* All the interesting properties are at the parent device_node
-        * level
-        */
-       dn = ds->cd->of_node->parent;
-       bcm_sf2_identify_ports(priv, ds->cd->of_node);
+       bcm_sf2_identify_ports(priv, dn->child);
 
        priv->irq0 = irq_of_parse_and_map(dn, 0);
        priv->irq1 = irq_of_parse_and_map(dn, 1);
 
        base = &priv->core;
        for (i = 0; i < BCM_SF2_REGS_NUM; i++) {
-               *base = of_iomap(dn, i);
-               if (*base == NULL) {
+               r = platform_get_resource(pdev, IORESOURCE_MEM, i);
+               *base = devm_ioremap_resource(&pdev->dev, r);
+               if (IS_ERR(*base)) {
                        pr_err("unable to find register: %s\n", reg_names[i]);
-                       ret = -ENOMEM;
-                       goto out_unmap;
+                       return PTR_ERR(*base);
                }
                base++;
        }
@@ -1606,30 +1053,30 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds)
        ret = bcm_sf2_sw_rst(priv);
        if (ret) {
                pr_err("unable to software reset switch: %d\n", ret);
-               goto out_unmap;
+               return ret;
        }
 
        ret = bcm_sf2_mdio_register(ds);
        if (ret) {
                pr_err("failed to register MDIO bus\n");
-               goto out_unmap;
+               return ret;
        }
 
        /* Disable all interrupts and request them */
        bcm_sf2_intr_disable(priv);
 
-       ret = request_irq(priv->irq0, bcm_sf2_switch_0_isr, 0,
-                         "switch_0", priv);
+       ret = devm_request_irq(&pdev->dev, priv->irq0, bcm_sf2_switch_0_isr, 0,
+                              "switch_0", priv);
        if (ret < 0) {
                pr_err("failed to request switch_0 IRQ\n");
                goto out_mdio;
        }
 
-       ret = request_irq(priv->irq1, bcm_sf2_switch_1_isr, 0,
-                         "switch_1", priv);
+       ret = devm_request_irq(&pdev->dev, priv->irq1, bcm_sf2_switch_1_isr, 0,
+                              "switch_1", priv);
        if (ret < 0) {
                pr_err("failed to request switch_1 IRQ\n");
-               goto out_free_irq0;
+               goto out_mdio;
        }
 
        /* Reset the MIB counters */
@@ -1649,19 +1096,6 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds)
                                 &priv->hw_params.num_gphy))
                priv->hw_params.num_gphy = 1;
 
-       /* Enable all valid ports and disable those unused */
-       for (port = 0; port < priv->hw_params.num_ports; port++) {
-               /* IMP port receives special treatment */
-               if ((1 << port) & ds->enabled_port_mask)
-                       bcm_sf2_port_setup(ds, port, NULL);
-               else if (dsa_is_cpu_port(ds, port))
-                       bcm_sf2_imp_setup(ds, port);
-               else
-                       bcm_sf2_port_disable(ds, port, NULL);
-       }
-
-       bcm_sf2_sw_configure_vlan(ds);
-
        rev = reg_readl(priv, REG_SWITCH_REVISION);
        priv->hw_params.top_rev = (rev >> SWITCH_TOP_REV_SHIFT) &
                                        SWITCH_TOP_REV_MASK;
@@ -1670,6 +1104,10 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds)
        rev = reg_readl(priv, REG_PHY_REVISION);
        priv->hw_params.gphy_rev = rev & PHY_REVISION_MASK;
 
+       ret = b53_switch_register(dev);
+       if (ret)
+               goto out_mdio;
+
        pr_info("Starfighter 2 top: %x.%02x, core: %x.%02x base: 0x%p, IRQs: %d, %d\n",
                priv->hw_params.top_rev >> 8, priv->hw_params.top_rev & 0xff,
                priv->hw_params.core_rev >> 8, priv->hw_params.core_rev & 0xff,
@@ -1677,66 +1115,60 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds)
 
        return 0;
 
-out_free_irq0:
-       free_irq(priv->irq0, priv);
 out_mdio:
        bcm_sf2_mdio_unregister(priv);
-out_unmap:
-       base = &priv->core;
-       for (i = 0; i < BCM_SF2_REGS_NUM; i++) {
-               if (*base)
-                       iounmap(*base);
-               base++;
-       }
        return ret;
 }
 
-static struct dsa_switch_driver bcm_sf2_switch_driver = {
-       .tag_protocol           = DSA_TAG_PROTO_BRCM,
-       .probe                  = bcm_sf2_sw_drv_probe,
-       .setup                  = bcm_sf2_sw_setup,
-       .set_addr               = bcm_sf2_sw_set_addr,
-       .get_phy_flags          = bcm_sf2_sw_get_phy_flags,
-       .get_strings            = bcm_sf2_sw_get_strings,
-       .get_ethtool_stats      = bcm_sf2_sw_get_ethtool_stats,
-       .get_sset_count         = bcm_sf2_sw_get_sset_count,
-       .adjust_link            = bcm_sf2_sw_adjust_link,
-       .fixed_link_update      = bcm_sf2_sw_fixed_link_update,
-       .suspend                = bcm_sf2_sw_suspend,
-       .resume                 = bcm_sf2_sw_resume,
-       .get_wol                = bcm_sf2_sw_get_wol,
-       .set_wol                = bcm_sf2_sw_set_wol,
-       .port_enable            = bcm_sf2_port_setup,
-       .port_disable           = bcm_sf2_port_disable,
-       .get_eee                = bcm_sf2_sw_get_eee,
-       .set_eee                = bcm_sf2_sw_set_eee,
-       .port_bridge_join       = bcm_sf2_sw_br_join,
-       .port_bridge_leave      = bcm_sf2_sw_br_leave,
-       .port_stp_state_set     = bcm_sf2_sw_br_set_stp_state,
-       .port_fdb_prepare       = bcm_sf2_sw_fdb_prepare,
-       .port_fdb_add           = bcm_sf2_sw_fdb_add,
-       .port_fdb_del           = bcm_sf2_sw_fdb_del,
-       .port_fdb_dump          = bcm_sf2_sw_fdb_dump,
-       .port_vlan_filtering    = bcm_sf2_sw_vlan_filtering,
-       .port_vlan_prepare      = bcm_sf2_sw_vlan_prepare,
-       .port_vlan_add          = bcm_sf2_sw_vlan_add,
-       .port_vlan_del          = bcm_sf2_sw_vlan_del,
-       .port_vlan_dump         = bcm_sf2_sw_vlan_dump,
-};
-
-static int __init bcm_sf2_init(void)
+static int bcm_sf2_sw_remove(struct platform_device *pdev)
 {
-       register_switch_driver(&bcm_sf2_switch_driver);
+       struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);
+
+       /* Disable all ports and interrupts */
+       priv->wol_ports_mask = 0;
+       bcm_sf2_sw_suspend(priv->dev->ds);
+       dsa_unregister_switch(priv->dev->ds);
+       bcm_sf2_mdio_unregister(priv);
 
        return 0;
 }
-module_init(bcm_sf2_init);
 
-static void __exit bcm_sf2_exit(void)
+#ifdef CONFIG_PM_SLEEP
+static int bcm_sf2_suspend(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);
+
+       return dsa_switch_suspend(priv->dev->ds);
+}
+
+static int bcm_sf2_resume(struct device *dev)
 {
-       unregister_switch_driver(&bcm_sf2_switch_driver);
+       struct platform_device *pdev = to_platform_device(dev);
+       struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);
+
+       return dsa_switch_resume(priv->dev->ds);
 }
-module_exit(bcm_sf2_exit);
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(bcm_sf2_pm_ops,
+                        bcm_sf2_suspend, bcm_sf2_resume);
+
+static const struct of_device_id bcm_sf2_of_match[] = {
+       { .compatible = "brcm,bcm7445-switch-v4.0" },
+       { /* sentinel */ },
+};
+
+static struct platform_driver bcm_sf2_driver = {
+       .probe  = bcm_sf2_sw_probe,
+       .remove = bcm_sf2_sw_remove,
+       .driver = {
+               .name = "brcm-sf2",
+               .of_match_table = bcm_sf2_of_match,
+               .pm = &bcm_sf2_pm_ops,
+       },
+};
+module_platform_driver(bcm_sf2_driver);
 
 MODULE_AUTHOR("Broadcom Corporation");
 MODULE_DESCRIPTION("Driver for Broadcom Starfighter 2 ethernet switch chip");
index 463bed8..4469267 100644 (file)
@@ -26,6 +26,7 @@
 #include <net/dsa.h>
 
 #include "bcm_sf2_regs.h"
+#include "b53/b53_priv.h"
 
 struct bcm_sf2_hw_params {
        u16     top_rev;
@@ -49,72 +50,8 @@ struct bcm_sf2_port_status {
        unsigned int link;
 
        struct ethtool_eee eee;
-
-       u32 vlan_ctl_mask;
-       u16 pvid;
-
-       struct net_device *bridge_dev;
-};
-
-struct bcm_sf2_arl_entry {
-       u8 port;
-       u8 mac[ETH_ALEN];
-       u16 vid;
-       u8 is_valid:1;
-       u8 is_age:1;
-       u8 is_static:1;
 };
 
-struct bcm_sf2_vlan {
-       u16 members;
-       u16 untag;
-};
-
-static inline void bcm_sf2_mac_from_u64(u64 src, u8 *dst)
-{
-       unsigned int i;
-
-       for (i = 0; i < ETH_ALEN; i++)
-               dst[ETH_ALEN - 1 - i] = (src >> (8 * i)) & 0xff;
-}
-
-static inline u64 bcm_sf2_mac_to_u64(const u8 *src)
-{
-       unsigned int i;
-       u64 dst = 0;
-
-       for (i = 0; i < ETH_ALEN; i++)
-               dst |= (u64)src[ETH_ALEN - 1 - i] << (8 * i);
-
-       return dst;
-}
-
-static inline void bcm_sf2_arl_to_entry(struct bcm_sf2_arl_entry *ent,
-                                       u64 mac_vid, u32 fwd_entry)
-{
-       memset(ent, 0, sizeof(*ent));
-       ent->port = fwd_entry & PORTID_MASK;
-       ent->is_valid = !!(fwd_entry & ARL_VALID);
-       ent->is_age = !!(fwd_entry & ARL_AGE);
-       ent->is_static = !!(fwd_entry & ARL_STATIC);
-       bcm_sf2_mac_from_u64(mac_vid, ent->mac);
-       ent->vid = mac_vid >> VID_SHIFT;
-}
-
-static inline void bcm_sf2_arl_from_entry(u64 *mac_vid, u32 *fwd_entry,
-                                         const struct bcm_sf2_arl_entry *ent)
-{
-       *mac_vid = bcm_sf2_mac_to_u64(ent->mac);
-       *mac_vid |= (u64)(ent->vid & VID_MASK) << VID_SHIFT;
-       *fwd_entry = ent->port & PORTID_MASK;
-       if (ent->is_valid)
-               *fwd_entry |= ARL_VALID;
-       if (ent->is_static)
-               *fwd_entry |= ARL_STATIC;
-       if (ent->is_age)
-               *fwd_entry |= ARL_AGE;
-}
-
 struct bcm_sf2_priv {
        /* Base registers, keep those in order with BCM_SF2_REGS_NAME */
        void __iomem                    *core;
@@ -134,6 +71,9 @@ struct bcm_sf2_priv {
        u32                             irq1_stat;
        u32                             irq1_mask;
 
+       /* Backing b53_device */
+       struct b53_device               *dev;
+
        /* Mutex protecting access to the MIB counters */
        struct mutex                    stats_mutex;
 
@@ -155,16 +95,14 @@ struct bcm_sf2_priv {
        struct device_node              *master_mii_dn;
        struct mii_bus                  *slave_mii_bus;
        struct mii_bus                  *master_mii_bus;
-
-       /* Cache of programmed VLANs */
-       struct bcm_sf2_vlan             vlans[VLAN_N_VID];
 };
 
-struct bcm_sf2_hw_stats {
-       const char      *string;
-       u16             reg;
-       u8              sizeof_stat;
-};
+static inline struct bcm_sf2_priv *bcm_sf2_to_priv(struct dsa_switch *ds)
+{
+       struct b53_device *dev = ds->priv;
+
+       return dev->priv;
+}
 
 #define SF2_IO_MACRO(name) \
 static inline u32 name##_readl(struct bcm_sf2_priv *priv, u32 off)     \
@@ -205,8 +143,8 @@ static inline void name##_writeq(struct bcm_sf2_priv *priv, u64 val,        \
 static inline void intrl2_##which##_mask_clear(struct bcm_sf2_priv *priv, \
                                                u32 mask)               \
 {                                                                      \
-       intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR);     \
        priv->irq##which##_mask &= ~(mask);                             \
+       intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR);     \
 }                                                                      \
 static inline void intrl2_##which##_mask_set(struct bcm_sf2_priv *priv, \
                                                u32 mask)               \
index 9f2a9cb..838fe37 100644 (file)
 #define  RX_BCST_EN                    (1 << 2)
 #define  RX_MCST_EN                    (1 << 3)
 #define  RX_UCST_EN                    (1 << 4)
-#define  G_MISTP_STATE_SHIFT           5
-#define  G_MISTP_NO_STP                        (0 << G_MISTP_STATE_SHIFT)
-#define  G_MISTP_DIS_STATE             (1 << G_MISTP_STATE_SHIFT)
-#define  G_MISTP_BLOCK_STATE           (2 << G_MISTP_STATE_SHIFT)
-#define  G_MISTP_LISTEN_STATE          (3 << G_MISTP_STATE_SHIFT)
-#define  G_MISTP_LEARN_STATE           (4 << G_MISTP_STATE_SHIFT)
-#define  G_MISTP_FWD_STATE             (5 << G_MISTP_STATE_SHIFT)
-#define  G_MISTP_STATE_MASK            0x7
 
 #define CORE_SWMODE                    0x0002c
 #define  SW_FWDG_MODE                  (1 << 0)
 #define  BRCM_HDR_EN_P5                        (1 << 1)
 #define  BRCM_HDR_EN_P7                        (1 << 2)
 
-#define CORE_BRCM_HDR_CTRL2            0x0828
-
-#define CORE_HL_PRTC_CTRL              0x0940
-#define  ARP_EN                                (1 << 0)
-#define  RARP_EN                       (1 << 1)
-#define  DHCP_EN                       (1 << 2)
-#define  ICMPV4_EN                     (1 << 3)
-#define  ICMPV6_EN                     (1 << 4)
-#define  ICMPV6_FWD_MODE               (1 << 5)
-#define  IGMP_DIP_EN                   (1 << 8)
-#define  IGMP_RPTLVE_EN                        (1 << 9)
-#define  IGMP_RTPLVE_FWD_MODE          (1 << 10)
-#define  IGMP_QRY_EN                   (1 << 11)
-#define  IGMP_QRY_FWD_MODE             (1 << 12)
-#define  IGMP_UKN_EN                   (1 << 13)
-#define  IGMP_UKN_FWD_MODE             (1 << 14)
-#define  MLD_RPTDONE_EN                        (1 << 15)
-#define  MLD_RPTDONE_FWD_MODE          (1 << 16)
-#define  MLD_QRY_EN                    (1 << 17)
-#define  MLD_QRY_FWD_MODE              (1 << 18)
-
 #define CORE_RST_MIB_CNT_EN            0x0950
 
 #define CORE_BRCM_HDR_RX_DIS           0x0980
 #define CORE_BRCM_HDR_TX_DIS           0x0988
 
-#define CORE_ARLA_NUM_ENTRIES          1024
-
-#define CORE_ARLA_RWCTL                        0x1400
-#define  ARL_RW                                (1 << 0)
-#define  IVL_SVL_SELECT                        (1 << 6)
-#define  ARL_STRTDN                    (1 << 7)
-
-#define CORE_ARLA_MAC                  0x1408
-#define CORE_ARLA_VID                  0x1420
-#define  ARLA_VIDTAB_INDX_MASK         0x1fff
-
-#define CORE_ARLA_MACVID0              0x1440
-#define  MAC_MASK                      0xffffffffff
-#define  VID_SHIFT                     48
-#define  VID_MASK                      0xfff
-
-#define CORE_ARLA_FWD_ENTRY0           0x1460
-#define  PORTID_MASK                   0x1ff
-#define  ARL_CON_SHIFT                 9
-#define  ARL_CON_MASK                  0x3
-#define  ARL_PRI_SHIFT                 11
-#define  ARL_PRI_MASK                  0x7
-#define  ARL_AGE                       (1 << 14)
-#define  ARL_STATIC                    (1 << 15)
-#define  ARL_VALID                     (1 << 16)
-
-#define CORE_ARLA_MACVID_ENTRY(x)      (CORE_ARLA_MACVID0 + ((x) * 0x40))
-#define CORE_ARLA_FWD_ENTRY(x)         (CORE_ARLA_FWD_ENTRY0 + ((x) * 0x40))
-
-#define CORE_ARLA_SRCH_CTL             0x1540
-#define  ARLA_SRCH_VLID                        (1 << 0)
-#define  IVL_SVL_SELECT                        (1 << 6)
-#define  ARLA_SRCH_STDN                        (1 << 7)
-
-#define CORE_ARLA_SRCH_ADR             0x1544
-#define  ARLA_SRCH_ADR_VALID           (1 << 15)
-
-#define CORE_ARLA_SRCH_RSLT_0_MACVID   0x1580
-#define CORE_ARLA_SRCH_RSLT_0          0x15a0
-
-#define CORE_ARLA_SRCH_RSLT_MACVID(x)  (CORE_ARLA_SRCH_RSLT_0_MACVID + ((x) * 0x40))
-#define CORE_ARLA_SRCH_RSLT(x)         (CORE_ARLA_SRCH_RSLT_0 + ((x) * 0x40))
-
 #define CORE_ARLA_VTBL_RWCTRL          0x1600
 #define  ARLA_VTBL_CMD_WRITE           0
 #define  ARLA_VTBL_CMD_READ            1
 #define  P_TXQ_PSM_VDD(x)              (P_TXQ_PSM_VDD_MASK << \
                                        ((x) * P_TXQ_PSM_VDD_SHIFT))
 
-#define        CORE_P0_MIB_OFFSET              0x8000
-#define P_MIB_SIZE                     0x400
-#define CORE_P_MIB_OFFSET(x)           (CORE_P0_MIB_OFFSET + (x) * P_MIB_SIZE)
-
 #define CORE_PORT_VLAN_CTL_PORT(x)     (0xc400 + ((x) * 0x8))
 #define  PORT_VLAN_CTRL_MASK           0x1ff
 
-#define CORE_VLAN_CTRL0                        0xd000
-#define  CHANGE_1P_VID_INNER           (1 << 0)
-#define  CHANGE_1P_VID_OUTER           (1 << 1)
-#define  CHANGE_1Q_VID                 (1 << 3)
-#define  VLAN_LEARN_MODE_SVL           (0 << 5)
-#define  VLAN_LEARN_MODE_IVL           (3 << 5)
-#define  VLAN_EN                       (1 << 7)
-
-#define CORE_VLAN_CTRL1                        0xd004
-#define  EN_RSV_MCAST_FWDMAP           (1 << 2)
-#define  EN_RSV_MCAST_UNTAG            (1 << 3)
-#define  EN_IPMC_BYPASS_FWDMAP         (1 << 5)
-#define  EN_IPMC_BYPASS_UNTAG          (1 << 6)
-
-#define CORE_VLAN_CTRL2                        0xd008
-#define  EN_MIIM_BYPASS_V_FWDMAP       (1 << 2)
-#define  EN_GMRP_GVRP_V_FWDMAP         (1 << 5)
-#define  EN_GMRP_GVRP_UNTAG_MAP                (1 << 6)
-
-#define CORE_VLAN_CTRL3                        0xd00c
-#define  EN_DROP_NON1Q_MASK            0x1ff
-
-#define CORE_VLAN_CTRL4                        0xd014
-#define  RESV_MCAST_FLOOD              (1 << 1)
-#define  EN_DOUBLE_TAG_MASK            0x3
-#define  EN_DOUBLE_TAG_SHIFT           2
-#define  EN_MGE_REV_GMRP               (1 << 4)
-#define  EN_MGE_REV_GVRP               (1 << 5)
-#define  INGR_VID_CHK_SHIFT            6
-#define  INGR_VID_CHK_MASK             0x3
-#define  INGR_VID_CHK_FWD              (0 << INGR_VID_CHK_SHIFT)
-#define  INGR_VID_CHK_DROP             (1 << INGR_VID_CHK_SHIFT)
-#define  INGR_VID_CHK_NO_CHK           (2 << INGR_VID_CHK_SHIFT)
-#define  INGR_VID_CHK_VID_VIOL_IMP     (3 << INGR_VID_CHK_SHIFT)
-
-#define CORE_VLAN_CTRL5                        0xd018
-#define  EN_CPU_RX_BYP_INNER_CRCCHCK   (1 << 0)
-#define  EN_VID_FFF_FWD                        (1 << 2)
-#define  DROP_VTABLE_MISS              (1 << 3)
-#define  EGRESS_DIR_FRM_BYP_TRUNK_EN   (1 << 4)
-#define  PRESV_NON1Q                   (1 << 6)
-
-#define CORE_VLAN_CTRL6                        0xd01c
-#define  STRICT_SFD_DETECT             (1 << 0)
-#define  DIS_ARL_BUST_LMIT             (1 << 4)
-
 #define CORE_DEFAULT_1Q_TAG_P(x)       (0xd040 + ((x) * 8))
 #define  CFI_SHIFT                     12
 #define  PRI_SHIFT                     13
index e36b408..7ce36db 100644 (file)
@@ -19,7 +19,7 @@
 
 static int reg_read(struct dsa_switch *ds, int addr, int reg)
 {
-       struct mv88e6060_priv *priv = ds_to_priv(ds);
+       struct mv88e6060_priv *priv = ds->priv;
 
        return mdiobus_read_nested(priv->bus, priv->sw_addr + addr, reg);
 }
@@ -37,7 +37,7 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg)
 
 static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
 {
-       struct mv88e6060_priv *priv = ds_to_priv(ds);
+       struct mv88e6060_priv *priv = ds->priv;
 
        return mdiobus_write_nested(priv->bus, priv->sw_addr + addr, reg, val);
 }
@@ -69,6 +69,11 @@ static const char *mv88e6060_get_name(struct mii_bus *bus, int sw_addr)
        return NULL;
 }
 
+static enum dsa_tag_protocol mv88e6060_get_tag_protocol(struct dsa_switch *ds)
+{
+       return DSA_TAG_PROTO_TRAILER;
+}
+
 static const char *mv88e6060_drv_probe(struct device *dsa_dev,
                                       struct device *host_dev, int sw_addr,
                                       void **_priv)
@@ -247,8 +252,8 @@ mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val)
        return reg_write(ds, addr, regnum, val);
 }
 
-static struct dsa_switch_driver mv88e6060_switch_driver = {
-       .tag_protocol   = DSA_TAG_PROTO_TRAILER,
+static struct dsa_switch_ops mv88e6060_switch_ops = {
+       .get_tag_protocol = mv88e6060_get_tag_protocol,
        .probe          = mv88e6060_drv_probe,
        .setup          = mv88e6060_setup,
        .set_addr       = mv88e6060_set_addr,
@@ -258,14 +263,14 @@ static struct dsa_switch_driver mv88e6060_switch_driver = {
 
 static int __init mv88e6060_init(void)
 {
-       register_switch_driver(&mv88e6060_switch_driver);
+       register_switch_driver(&mv88e6060_switch_ops);
        return 0;
 }
 module_init(mv88e6060_init);
 
 static void __exit mv88e6060_cleanup(void)
 {
-       unregister_switch_driver(&mv88e6060_switch_driver);
+       unregister_switch_driver(&mv88e6060_switch_ops);
 }
 module_exit(mv88e6060_cleanup);
 
index 490bc06..4866688 100644 (file)
@@ -2,6 +2,18 @@ config NET_DSA_MV88E6XXX
        tristate "Marvell 88E6xxx Ethernet switch fabric support"
        depends on NET_DSA
        select NET_DSA_TAG_EDSA
+       select NET_DSA_TAG_DSA
        help
          This driver adds support for most of the Marvell 88E6xxx models of
          Ethernet switch chips, except 88E6060.
+
+config NET_DSA_MV88E6XXX_GLOBAL2
+       bool "Switch Global 2 Registers support"
+       default y
+       depends on NET_DSA_MV88E6XXX
+       help
+         This registers set at internal SMI address 0x1C provides extended
+         features like EEPROM interface, trunking, cross-chip setup, etc.
+
+         It is required on most chips. If the chip you compile the support for
+         doesn't have such registers set, say N here. In doubt, say Y.
index 6e29a75..6971039 100644 (file)
@@ -1 +1,3 @@
-obj-$(CONFIG_NET_DSA_MV88E6XXX) += chip.o
+obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o
+mv88e6xxx-objs := chip.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o
index a230fcb..70a812d 100644 (file)
@@ -29,7 +29,9 @@
 #include <linux/phy.h>
 #include <net/dsa.h>
 #include <net/switchdev.h>
+
 #include "mv88e6xxx.h"
+#include "global2.h"
 
 static void assert_reg_lock(struct mv88e6xxx_chip *chip)
 {
@@ -182,8 +184,7 @@ static const struct mv88e6xxx_ops mv88e6xxx_smi_multi_chip_ops = {
        .write = mv88e6xxx_smi_multi_chip_write,
 };
 
-static int mv88e6xxx_read(struct mv88e6xxx_chip *chip,
-                         int addr, int reg, u16 *val)
+int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val)
 {
        int err;
 
@@ -199,8 +200,7 @@ static int mv88e6xxx_read(struct mv88e6xxx_chip *chip,
        return 0;
 }
 
-static int mv88e6xxx_write(struct mv88e6xxx_chip *chip,
-                          int addr, int reg, u16 val)
+int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val)
 {
        int err;
 
@@ -306,12 +306,11 @@ static int mv88e6xxx_serdes_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
                                        reg, val);
 }
 
-static int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg,
-                         u16 mask)
+int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask)
 {
-       unsigned long timeout = jiffies + HZ / 10;
+       int i;
 
-       while (time_before(jiffies, timeout)) {
+       for (i = 0; i < 16; i++) {
                u16 val;
                int err;
 
@@ -325,28 +324,20 @@ static int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg,
                usleep_range(1000, 2000);
        }
 
+       dev_err(chip->dev, "Timeout while waiting for switch\n");
        return -ETIMEDOUT;
 }
 
 /* Indirect write to single pointer-data register with an Update bit */
-static int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
-                           u16 update)
+int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, u16 update)
 {
        u16 val;
-       int i, err;
+       int err;
 
        /* Wait until the previous operation is completed */
-       for (i = 0; i < 16; ++i) {
-               err = mv88e6xxx_read(chip, addr, reg, &val);
-               if (err)
-                       return err;
-
-               if (!(val & BIT(15)))
-                       break;
-       }
-
-       if (i == 16)
-               return -ETIMEDOUT;
+       err = mv88e6xxx_wait(chip, addr, reg, BIT(15));
+       if (err)
+               return err;
 
        /* Set the Update bit to trigger a write operation */
        val = BIT(15) | update;
@@ -375,7 +366,7 @@ static int _mv88e6xxx_reg_write(struct mv88e6xxx_chip *chip, int addr,
 static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip)
 {
        int ret;
-       unsigned long timeout;
+       int i;
 
        ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_CONTROL);
        if (ret < 0)
@@ -386,8 +377,7 @@ static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip)
        if (ret)
                return ret;
 
-       timeout = jiffies + 1 * HZ;
-       while (time_before(jiffies, timeout)) {
+       for (i = 0; i < 16; i++) {
                ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATUS);
                if (ret < 0)
                        return ret;
@@ -403,8 +393,7 @@ static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip)
 
 static int mv88e6xxx_ppu_enable(struct mv88e6xxx_chip *chip)
 {
-       int ret, err;
-       unsigned long timeout;
+       int ret, err, i;
 
        ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_CONTROL);
        if (ret < 0)
@@ -415,8 +404,7 @@ static int mv88e6xxx_ppu_enable(struct mv88e6xxx_chip *chip)
        if (err)
                return err;
 
-       timeout = jiffies + 1 * HZ;
-       while (time_before(jiffies, timeout)) {
+       for (i = 0; i < 16; i++) {
                ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATUS);
                if (ret < 0)
                        return ret;
@@ -496,6 +484,11 @@ static void mv88e6xxx_ppu_state_init(struct mv88e6xxx_chip *chip)
        chip->ppu_timer.function = mv88e6xxx_ppu_reenable_timer;
 }
 
+static void mv88e6xxx_ppu_state_destroy(struct mv88e6xxx_chip *chip)
+{
+       del_timer_sync(&chip->ppu_timer);
+}
+
 static int mv88e6xxx_phy_ppu_read(struct mv88e6xxx_chip *chip, int addr,
                                  int reg, u16 *val)
 {
@@ -591,7 +584,7 @@ static bool mv88e6xxx_has_fid_reg(struct mv88e6xxx_chip *chip)
 static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port,
                                  struct phy_device *phydev)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        u32 reg;
        int ret;
 
@@ -837,7 +830,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
 static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
                                  uint8_t *data)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        struct mv88e6xxx_hw_stat *stat;
        int i, j;
 
@@ -853,7 +846,7 @@ static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
 
 static int mv88e6xxx_get_sset_count(struct dsa_switch *ds)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        struct mv88e6xxx_hw_stat *stat;
        int i, j;
 
@@ -868,7 +861,7 @@ static int mv88e6xxx_get_sset_count(struct dsa_switch *ds)
 static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
                                        uint64_t *data)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        struct mv88e6xxx_hw_stat *stat;
        int ret;
        int i, j;
@@ -899,7 +892,7 @@ static int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port)
 static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
                               struct ethtool_regs *regs, void *_p)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        u16 *p = _p;
        int i;
 
@@ -929,7 +922,7 @@ static int _mv88e6xxx_atu_wait(struct mv88e6xxx_chip *chip)
 static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port,
                             struct ethtool_eee *e)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        u16 reg;
        int err;
 
@@ -959,7 +952,7 @@ out:
 static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port,
                             struct phy_device *phydev, struct ethtool_eee *e)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        u16 reg;
        int err;
 
@@ -1190,7 +1183,7 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port)
 static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port,
                                         u8 state)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        int stp_state;
        int err;
 
@@ -1439,7 +1432,7 @@ static int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port,
                                    struct switchdev_obj_port_vlan *vlan,
                                    int (*cb)(struct switchdev_obj *obj))
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        struct mv88e6xxx_vtu_stu_entry next;
        u16 pvid;
        int err;
@@ -1808,7 +1801,7 @@ static int _mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid,
 static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port,
                                        u16 vid_begin, u16 vid_end)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        struct mv88e6xxx_vtu_stu_entry vlan;
        int i, err;
 
@@ -1869,7 +1862,7 @@ static const char * const mv88e6xxx_port_8021q_mode_names[] = {
 static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port,
                                         bool vlan_filtering)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        u16 old, new = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE :
                PORT_CONTROL_2_8021Q_DISABLED;
        int ret;
@@ -1911,7 +1904,7 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port,
                            const struct switchdev_obj_port_vlan *vlan,
                            struct switchdev_trans *trans)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        int err;
 
        if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU))
@@ -1952,7 +1945,7 @@ static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port,
                                    const struct switchdev_obj_port_vlan *vlan,
                                    struct switchdev_trans *trans)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
        bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
        u16 vid;
@@ -2014,7 +2007,7 @@ static int _mv88e6xxx_port_vlan_del(struct mv88e6xxx_chip *chip,
 static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port,
                                   const struct switchdev_obj_port_vlan *vlan)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        u16 pvid, vid;
        int err = 0;
 
@@ -2098,9 +2091,9 @@ static int _mv88e6xxx_atu_load(struct mv88e6xxx_chip *chip,
        return _mv88e6xxx_atu_cmd(chip, entry->fid, GLOBAL_ATU_OP_LOAD_DB);
 }
 
-static int _mv88e6xxx_port_fdb_load(struct mv88e6xxx_chip *chip, int port,
-                                   const unsigned char *addr, u16 vid,
-                                   u8 state)
+static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
+                                       const unsigned char *addr, u16 vid,
+                                       u8 state)
 {
        struct mv88e6xxx_atu_entry entry = { 0 };
        struct mv88e6xxx_vtu_stu_entry vlan;
@@ -2139,30 +2132,27 @@ static void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
                                   const struct switchdev_obj_port_fdb *fdb,
                                   struct switchdev_trans *trans)
 {
-       int state = is_multicast_ether_addr(fdb->addr) ?
-               GLOBAL_ATU_DATA_STATE_MC_STATIC :
-               GLOBAL_ATU_DATA_STATE_UC_STATIC;
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
 
        mutex_lock(&chip->reg_lock);
-       if (_mv88e6xxx_port_fdb_load(chip, port, fdb->addr, fdb->vid, state))
-               netdev_err(ds->ports[port].netdev,
-                          "failed to load MAC address\n");
+       if (mv88e6xxx_port_db_load_purge(chip, port, fdb->addr, fdb->vid,
+                                        GLOBAL_ATU_DATA_STATE_UC_STATIC))
+               netdev_err(ds->ports[port].netdev, "failed to load unicast MAC address\n");
        mutex_unlock(&chip->reg_lock);
 }
 
 static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port,
                                  const struct switchdev_obj_port_fdb *fdb)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-       int ret;
+       struct mv88e6xxx_chip *chip = ds->priv;
+       int err;
 
        mutex_lock(&chip->reg_lock);
-       ret = _mv88e6xxx_port_fdb_load(chip, port, fdb->addr, fdb->vid,
-                                      GLOBAL_ATU_DATA_STATE_UNUSED);
+       err = mv88e6xxx_port_db_load_purge(chip, port, fdb->addr, fdb->vid,
+                                          GLOBAL_ATU_DATA_STATE_UNUSED);
        mutex_unlock(&chip->reg_lock);
 
-       return ret;
+       return err;
 }
 
 static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid,
@@ -2210,10 +2200,10 @@ static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid,
        return 0;
 }
 
-static int _mv88e6xxx_port_fdb_dump_one(struct mv88e6xxx_chip *chip,
-                                       u16 fid, u16 vid, int port,
-                                       struct switchdev_obj_port_fdb *fdb,
-                                       int (*cb)(struct switchdev_obj *obj))
+static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip,
+                                     u16 fid, u16 vid, int port,
+                                     struct switchdev_obj *obj,
+                                     int (*cb)(struct switchdev_obj *obj))
 {
        struct mv88e6xxx_atu_entry addr = {
                .mac = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
@@ -2227,72 +2217,98 @@ static int _mv88e6xxx_port_fdb_dump_one(struct mv88e6xxx_chip *chip,
        do {
                err = _mv88e6xxx_atu_getnext(chip, fid, &addr);
                if (err)
-                       break;
+                       return err;
 
                if (addr.state == GLOBAL_ATU_DATA_STATE_UNUSED)
                        break;
 
-               if (!addr.trunk && addr.portv_trunkid & BIT(port)) {
-                       bool is_static = addr.state ==
-                               (is_multicast_ether_addr(addr.mac) ?
-                                GLOBAL_ATU_DATA_STATE_MC_STATIC :
-                                GLOBAL_ATU_DATA_STATE_UC_STATIC);
+               if (addr.trunk || (addr.portv_trunkid & BIT(port)) == 0)
+                       continue;
+
+               if (obj->id == SWITCHDEV_OBJ_ID_PORT_FDB) {
+                       struct switchdev_obj_port_fdb *fdb;
 
+                       if (!is_unicast_ether_addr(addr.mac))
+                               continue;
+
+                       fdb = SWITCHDEV_OBJ_PORT_FDB(obj);
                        fdb->vid = vid;
                        ether_addr_copy(fdb->addr, addr.mac);
-                       fdb->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE;
+                       if (addr.state == GLOBAL_ATU_DATA_STATE_UC_STATIC)
+                               fdb->ndm_state = NUD_NOARP;
+                       else
+                               fdb->ndm_state = NUD_REACHABLE;
+               } else if (obj->id == SWITCHDEV_OBJ_ID_PORT_MDB) {
+                       struct switchdev_obj_port_mdb *mdb;
 
-                       err = cb(&fdb->obj);
-                       if (err)
-                               break;
+                       if (!is_multicast_ether_addr(addr.mac))
+                               continue;
+
+                       mdb = SWITCHDEV_OBJ_PORT_MDB(obj);
+                       mdb->vid = vid;
+                       ether_addr_copy(mdb->addr, addr.mac);
+               } else {
+                       return -EOPNOTSUPP;
                }
+
+               err = cb(obj);
+               if (err)
+                       return err;
        } while (!is_broadcast_ether_addr(addr.mac));
 
        return err;
 }
 
-static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port,
-                                  struct switchdev_obj_port_fdb *fdb,
-                                  int (*cb)(struct switchdev_obj *obj))
+static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
+                                 struct switchdev_obj *obj,
+                                 int (*cb)(struct switchdev_obj *obj))
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
        struct mv88e6xxx_vtu_stu_entry vlan = {
                .vid = GLOBAL_VTU_VID_MASK, /* all ones */
        };
        u16 fid;
        int err;
 
-       mutex_lock(&chip->reg_lock);
-
        /* Dump port's default Filtering Information Database (VLAN ID 0) */
        err = _mv88e6xxx_port_fid_get(chip, port, &fid);
        if (err)
-               goto unlock;
+               return err;
 
-       err = _mv88e6xxx_port_fdb_dump_one(chip, fid, 0, port, fdb, cb);
+       err = mv88e6xxx_port_db_dump_fid(chip, fid, 0, port, obj, cb);
        if (err)
-               goto unlock;
+               return err;
 
        /* Dump VLANs' Filtering Information Databases */
        err = _mv88e6xxx_vtu_vid_write(chip, vlan.vid);
        if (err)
-               goto unlock;
+               return err;
 
        do {
                err = _mv88e6xxx_vtu_getnext(chip, &vlan);
                if (err)
-                       break;
+                       return err;
 
                if (!vlan.valid)
                        break;
 
-               err = _mv88e6xxx_port_fdb_dump_one(chip, vlan.fid, vlan.vid,
-                                                  port, fdb, cb);
+               err = mv88e6xxx_port_db_dump_fid(chip, vlan.fid, vlan.vid, port,
+                                                obj, cb);
                if (err)
-                       break;
+                       return err;
        } while (vlan.vid < GLOBAL_VTU_VID_MASK);
 
-unlock:
+       return err;
+}
+
+static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port,
+                                  struct switchdev_obj_port_fdb *fdb,
+                                  int (*cb)(struct switchdev_obj *obj))
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+       int err;
+
+       mutex_lock(&chip->reg_lock);
+       err = mv88e6xxx_port_db_dump(chip, port, &fdb->obj, cb);
        mutex_unlock(&chip->reg_lock);
 
        return err;
@@ -2301,7 +2317,7 @@ unlock:
 static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port,
                                      struct net_device *bridge)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        int i, err = 0;
 
        mutex_lock(&chip->reg_lock);
@@ -2324,7 +2340,7 @@ static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port,
 
 static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        struct net_device *bridge = chip->ports[port].bridge_dev;
        int i;
 
@@ -2493,28 +2509,13 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
                PORT_CONTROL_USE_TAG | PORT_CONTROL_USE_IP |
                PORT_CONTROL_STATE_FORWARDING;
        if (dsa_is_cpu_port(ds, port)) {
-               if (mv88e6xxx_6095_family(chip) || mv88e6xxx_6185_family(chip))
-                       reg |= PORT_CONTROL_DSA_TAG;
-               if (mv88e6xxx_6352_family(chip) ||
-                   mv88e6xxx_6351_family(chip) ||
-                   mv88e6xxx_6165_family(chip) ||
-                   mv88e6xxx_6097_family(chip) ||
-                   mv88e6xxx_6320_family(chip)) {
+               if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA))
                        reg |= PORT_CONTROL_FRAME_ETHER_TYPE_DSA |
-                               PORT_CONTROL_FORWARD_UNKNOWN |
                                PORT_CONTROL_FORWARD_UNKNOWN_MC;
-               }
-
-               if (mv88e6xxx_6352_family(chip) ||
-                   mv88e6xxx_6351_family(chip) ||
-                   mv88e6xxx_6165_family(chip) ||
-                   mv88e6xxx_6097_family(chip) ||
-                   mv88e6xxx_6095_family(chip) ||
-                   mv88e6xxx_6065_family(chip) ||
-                   mv88e6xxx_6185_family(chip) ||
-                   mv88e6xxx_6320_family(chip)) {
-                       reg |= PORT_CONTROL_EGRESS_ADD_TAG;
-               }
+               else
+                       reg |= PORT_CONTROL_DSA_TAG;
+               reg |= PORT_CONTROL_EGRESS_ADD_TAG |
+                       PORT_CONTROL_FORWARD_UNKNOWN;
        }
        if (dsa_is_dsa_port(ds, port)) {
                if (mv88e6xxx_6095_family(chip) ||
@@ -2642,10 +2643,13 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
                /* Port Ethertype: use the Ethertype DSA Ethertype
                 * value.
                 */
-               ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-                                          PORT_ETH_TYPE, ETH_P_EDSA);
-               if (ret)
-                       return ret;
+               if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA)) {
+                       ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
+                                                  PORT_ETH_TYPE, ETH_P_EDSA);
+                       if (ret)
+                               return ret;
+               }
+
                /* Tag Remap: use an identity 802.1p prio -> switch
                 * prio mapping.
                 */
@@ -2663,15 +2667,19 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
                        return ret;
        }
 
+       /* Rate Control: disable ingress rate limiting. */
        if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
            mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) ||
-           mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip) ||
            mv88e6xxx_6320_family(chip)) {
-               /* Rate Control: disable ingress rate limiting. */
                ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
                                           PORT_RATE_CONTROL, 0x0001);
                if (ret)
                        return ret;
+       } else if (mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip)) {
+               ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
+                                          PORT_RATE_CONTROL, 0x0000);
+               if (ret)
+                       return ret;
        }
 
        /* Port Control 1: disable trunking, disable sending
@@ -2753,7 +2761,7 @@ static int mv88e6xxx_g1_set_age_time(struct mv88e6xxx_chip *chip,
 static int mv88e6xxx_set_ageing_time(struct dsa_switch *ds,
                                     unsigned int ageing_time)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        int err;
 
        mutex_lock(&chip->reg_lock);
@@ -2868,333 +2876,9 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip)
        return 0;
 }
 
-static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip,
-                                            int target, int port)
-{
-       u16 val = (target << 8) | (port & 0xf);
-
-       return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING, val);
-}
-
-static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip)
-{
-       int target, port;
-       int err;
-
-       /* Initialize the routing port to the 32 possible target devices */
-       for (target = 0; target < 32; ++target) {
-               port = 0xf;
-
-               if (target < DSA_MAX_SWITCHES) {
-                       port = chip->ds->rtable[target];
-                       if (port == DSA_RTABLE_NONE)
-                               port = 0xf;
-               }
-
-               err = mv88e6xxx_g2_device_mapping_write(chip, target, port);
-               if (err)
-                       break;
-       }
-
-       return err;
-}
-
-static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num,
-                                        bool hask, u16 mask)
-{
-       const u16 port_mask = BIT(chip->info->num_ports) - 1;
-       u16 val = (num << 12) | (mask & port_mask);
-
-       if (hask)
-               val |= GLOBAL2_TRUNK_MASK_HASK;
-
-       return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MASK, val);
-}
-
-static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id,
-                                           u16 map)
-{
-       const u16 port_mask = BIT(chip->info->num_ports) - 1;
-       u16 val = (id << 11) | (map & port_mask);
-
-       return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING, val);
-}
-
-static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip)
-{
-       const u16 port_mask = BIT(chip->info->num_ports) - 1;
-       int i, err;
-
-       /* Clear all eight possible Trunk Mask vectors */
-       for (i = 0; i < 8; ++i) {
-               err = mv88e6xxx_g2_trunk_mask_write(chip, i, false, port_mask);
-               if (err)
-                       return err;
-       }
-
-       /* Clear all sixteen possible Trunk ID routing vectors */
-       for (i = 0; i < 16; ++i) {
-               err = mv88e6xxx_g2_trunk_mapping_write(chip, i, 0);
-               if (err)
-                       return err;
-       }
-
-       return 0;
-}
-
-static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip)
-{
-       int port, err;
-
-       /* Init all Ingress Rate Limit resources of all ports */
-       for (port = 0; port < chip->info->num_ports; ++port) {
-               /* XXX newer chips (like 88E6390) have different 2-bit ops */
-               err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD,
-                                     GLOBAL2_IRL_CMD_OP_INIT_ALL |
-                                     (port << 8));
-               if (err)
-                       break;
-
-               /* Wait for the operation to complete */
-               err = mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD,
-                                    GLOBAL2_IRL_CMD_BUSY);
-               if (err)
-                       break;
-       }
-
-       return err;
-}
-
-/* Indirect write to the Switch MAC/WoL/WoF register */
-static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip,
-                                        unsigned int pointer, u8 data)
-{
-       u16 val = (pointer << 8) | data;
-
-       return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MAC, val);
-}
-
-static int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr)
-{
-       int i, err;
-
-       for (i = 0; i < 6; i++) {
-               err = mv88e6xxx_g2_switch_mac_write(chip, i, addr[i]);
-               if (err)
-                       break;
-       }
-
-       return err;
-}
-
-static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer,
-                                 u8 data)
-{
-       u16 val = (pointer << 8) | (data & 0x7);
-
-       return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, val);
-}
-
-static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip)
-{
-       int i, err;
-
-       /* Clear all sixteen possible Priority Override entries */
-       for (i = 0; i < 16; i++) {
-               err = mv88e6xxx_g2_pot_write(chip, i, 0);
-               if (err)
-                       break;
-       }
-
-       return err;
-}
-
-static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip)
-{
-       return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD,
-                             GLOBAL2_EEPROM_CMD_BUSY |
-                             GLOBAL2_EEPROM_CMD_RUNNING);
-}
-
-static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
-{
-       int err;
-
-       err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, cmd);
-       if (err)
-               return err;
-
-       return mv88e6xxx_g2_eeprom_wait(chip);
-}
-
-static int mv88e6xxx_g2_eeprom_read16(struct mv88e6xxx_chip *chip,
-                                     u8 addr, u16 *data)
-{
-       u16 cmd = GLOBAL2_EEPROM_CMD_OP_READ | addr;
-       int err;
-
-       err = mv88e6xxx_g2_eeprom_wait(chip);
-       if (err)
-               return err;
-
-       err = mv88e6xxx_g2_eeprom_cmd(chip, cmd);
-       if (err)
-               return err;
-
-       return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data);
-}
-
-static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip,
-                                      u8 addr, u16 data)
-{
-       u16 cmd = GLOBAL2_EEPROM_CMD_OP_WRITE | addr;
-       int err;
-
-       err = mv88e6xxx_g2_eeprom_wait(chip);
-       if (err)
-               return err;
-
-       err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data);
-       if (err)
-               return err;
-
-       return mv88e6xxx_g2_eeprom_cmd(chip, cmd);
-}
-
-static int mv88e6xxx_g2_smi_phy_wait(struct mv88e6xxx_chip *chip)
-{
-       return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD,
-                             GLOBAL2_SMI_PHY_CMD_BUSY);
-}
-
-static int mv88e6xxx_g2_smi_phy_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
-{
-       int err;
-
-       err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, cmd);
-       if (err)
-               return err;
-
-       return mv88e6xxx_g2_smi_phy_wait(chip);
-}
-
-static int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr,
-                                    int reg, u16 *val)
-{
-       u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA | (addr << 5) | reg;
-       int err;
-
-       err = mv88e6xxx_g2_smi_phy_wait(chip);
-       if (err)
-               return err;
-
-       err = mv88e6xxx_g2_smi_phy_cmd(chip, cmd);
-       if (err)
-               return err;
-
-       return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val);
-}
-
-static int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr,
-                                     int reg, u16 val)
-{
-       u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA | (addr << 5) | reg;
-       int err;
-
-       err = mv88e6xxx_g2_smi_phy_wait(chip);
-       if (err)
-               return err;
-
-       err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val);
-       if (err)
-               return err;
-
-       return mv88e6xxx_g2_smi_phy_cmd(chip, cmd);
-}
-
-static const struct mv88e6xxx_ops mv88e6xxx_g2_smi_phy_ops = {
-       .read = mv88e6xxx_g2_smi_phy_read,
-       .write = mv88e6xxx_g2_smi_phy_write,
-};
-
-static int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
-{
-       u16 reg;
-       int err;
-
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) {
-               /* Consider the frames with reserved multicast destination
-                * addresses matching 01:80:c2:00:00:2x as MGMT.
-                */
-               err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_2X,
-                                     0xffff);
-               if (err)
-                       return err;
-       }
-
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X)) {
-               /* Consider the frames with reserved multicast destination
-                * addresses matching 01:80:c2:00:00:0x as MGMT.
-                */
-               err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X,
-                                     0xffff);
-               if (err)
-                       return err;
-       }
-
-       /* Ignore removed tag data on doubly tagged packets, disable
-        * flow control messages, force flow control priority to the
-        * highest, and send all special multicast frames to the CPU
-        * port at the highest priority.
-        */
-       reg = GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI | (0x7 << 4);
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) ||
-           mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X))
-               reg |= GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x7;
-       err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, reg);
-       if (err)
-               return err;
-
-       /* Program the DSA routing table. */
-       err = mv88e6xxx_g2_set_device_mapping(chip);
-       if (err)
-               return err;
-
-       /* Clear all trunk masks and mapping. */
-       err = mv88e6xxx_g2_clear_trunk(chip);
-       if (err)
-               return err;
-
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_IRL)) {
-               /* Disable ingress rate limiting by resetting all per port
-                * ingress rate limit resources to their initial state.
-                */
-               err = mv88e6xxx_g2_clear_irl(chip);
-                       if (err)
-                               return err;
-       }
-
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_PVT)) {
-               /* Initialize Cross-chip Port VLAN Table to reset defaults */
-               err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_PVT_ADDR,
-                                     GLOBAL2_PVT_ADDR_OP_INIT_ONES);
-               if (err)
-                       return err;
-       }
-
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) {
-               /* Clear the priority override table. */
-               err = mv88e6xxx_g2_clear_pot(chip);
-               if (err)
-                       return err;
-       }
-
-       return 0;
-}
-
 static int mv88e6xxx_setup(struct dsa_switch *ds)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        int err;
        int i;
 
@@ -3234,7 +2918,7 @@ unlock:
 
 static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        int err;
 
        mutex_lock(&chip->reg_lock);
@@ -3342,7 +3026,7 @@ static void mv88e6xxx_mdio_unregister(struct mv88e6xxx_chip *chip)
 
 static int mv88e61xx_get_temp(struct dsa_switch *ds, int *temp)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        u16 val;
        int ret;
 
@@ -3385,7 +3069,7 @@ error:
 
 static int mv88e63xx_get_temp(struct dsa_switch *ds, int *temp)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
        u16 val;
        int ret;
@@ -3405,7 +3089,7 @@ static int mv88e63xx_get_temp(struct dsa_switch *ds, int *temp)
 
 static int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
 
        if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP))
                return -EOPNOTSUPP;
@@ -3418,7 +3102,7 @@ static int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp)
 
 static int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
        u16 val;
        int ret;
@@ -3441,7 +3125,7 @@ static int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp)
 
 static int mv88e6xxx_set_temp_limit(struct dsa_switch *ds, int temp)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
        u16 val;
        int err;
@@ -3464,7 +3148,7 @@ unlock:
 
 static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
        u16 val;
        int ret;
@@ -3488,71 +3172,21 @@ static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm)
 
 static int mv88e6xxx_get_eeprom_len(struct dsa_switch *ds)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
 
        return chip->eeprom_len;
 }
 
-static int mv88e6xxx_get_eeprom16(struct mv88e6xxx_chip *chip,
-                                 struct ethtool_eeprom *eeprom, u8 *data)
-{
-       unsigned int offset = eeprom->offset;
-       unsigned int len = eeprom->len;
-       u16 val;
-       int err;
-
-       eeprom->len = 0;
-
-       if (offset & 1) {
-               err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
-               if (err)
-                       return err;
-
-               *data++ = (val >> 8) & 0xff;
-
-               offset++;
-               len--;
-               eeprom->len++;
-       }
-
-       while (len >= 2) {
-               err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
-               if (err)
-                       return err;
-
-               *data++ = val & 0xff;
-               *data++ = (val >> 8) & 0xff;
-
-               offset += 2;
-               len -= 2;
-               eeprom->len += 2;
-       }
-
-       if (len) {
-               err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
-               if (err)
-                       return err;
-
-               *data++ = val & 0xff;
-
-               offset++;
-               len--;
-               eeprom->len++;
-       }
-
-       return 0;
-}
-
 static int mv88e6xxx_get_eeprom(struct dsa_switch *ds,
                                struct ethtool_eeprom *eeprom, u8 *data)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        int err;
 
        mutex_lock(&chip->reg_lock);
 
        if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16))
-               err = mv88e6xxx_get_eeprom16(chip, eeprom, data);
+               err = mv88e6xxx_g2_get_eeprom16(chip, eeprom, data);
        else
                err = -EOPNOTSUPP;
 
@@ -3566,76 +3200,10 @@ static int mv88e6xxx_get_eeprom(struct dsa_switch *ds,
        return 0;
 }
 
-static int mv88e6xxx_set_eeprom16(struct mv88e6xxx_chip *chip,
-                                 struct ethtool_eeprom *eeprom, u8 *data)
-{
-       unsigned int offset = eeprom->offset;
-       unsigned int len = eeprom->len;
-       u16 val;
-       int err;
-
-       /* Ensure the RO WriteEn bit is set */
-       err = mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, &val);
-       if (err)
-               return err;
-
-       if (!(val & GLOBAL2_EEPROM_CMD_WRITE_EN))
-               return -EROFS;
-
-       eeprom->len = 0;
-
-       if (offset & 1) {
-               err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
-               if (err)
-                       return err;
-
-               val = (*data++ << 8) | (val & 0xff);
-
-               err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
-               if (err)
-                       return err;
-
-               offset++;
-               len--;
-               eeprom->len++;
-       }
-
-       while (len >= 2) {
-               val = *data++;
-               val |= *data++ << 8;
-
-               err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
-               if (err)
-                       return err;
-
-               offset += 2;
-               len -= 2;
-               eeprom->len += 2;
-       }
-
-       if (len) {
-               err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
-               if (err)
-                       return err;
-
-               val = (val & 0xff00) | *data++;
-
-               err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
-               if (err)
-                       return err;
-
-               offset++;
-               len--;
-               eeprom->len++;
-       }
-
-       return 0;
-}
-
 static int mv88e6xxx_set_eeprom(struct dsa_switch *ds,
                                struct ethtool_eeprom *eeprom, u8 *data)
 {
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
        int err;
 
        if (eeprom->magic != 0xc3ec4951)
@@ -3644,7 +3212,7 @@ static int mv88e6xxx_set_eeprom(struct dsa_switch *ds,
        mutex_lock(&chip->reg_lock);
 
        if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16))
-               err = mv88e6xxx_set_eeprom16(chip, eeprom, data);
+               err = mv88e6xxx_g2_set_eeprom16(chip, eeprom, data);
        else
                err = -EOPNOTSUPP;
 
@@ -3876,6 +3444,10 @@ static int mv88e6xxx_detect(struct mv88e6xxx_chip *chip)
        /* Update the compatible info with the probed one */
        chip->info = info;
 
+       err = mv88e6xxx_g2_require(chip);
+       if (err)
+               return err;
+
        dev_info(chip->dev, "switch 0x%x detected: %s, revision %u\n",
                 chip->info->prod_num, chip->info->name, rev);
 
@@ -3897,6 +3469,11 @@ static struct mv88e6xxx_chip *mv88e6xxx_alloc_chip(struct device *dev)
        return chip;
 }
 
+static const struct mv88e6xxx_ops mv88e6xxx_g2_smi_phy_ops = {
+       .read = mv88e6xxx_g2_smi_phy_read,
+       .write = mv88e6xxx_g2_smi_phy_write,
+};
+
 static const struct mv88e6xxx_ops mv88e6xxx_phy_ops = {
        .read = mv88e6xxx_read,
        .write = mv88e6xxx_write,
@@ -3914,6 +3491,13 @@ static void mv88e6xxx_phy_init(struct mv88e6xxx_chip *chip)
        }
 }
 
+static void mv88e6xxx_phy_destroy(struct mv88e6xxx_chip *chip)
+{
+       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) {
+               mv88e6xxx_ppu_state_destroy(chip);
+       }
+}
+
 static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip,
                              struct mii_bus *bus, int sw_addr)
 {
@@ -3934,6 +3518,16 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip,
        return 0;
 }
 
+static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+
+       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA))
+               return DSA_TAG_PROTO_EDSA;
+
+       return DSA_TAG_PROTO_DSA;
+}
+
 static const char *mv88e6xxx_drv_probe(struct device *dsa_dev,
                                       struct device *host_dev, int sw_addr,
                                       void **priv)
@@ -3976,9 +3570,61 @@ free:
        return NULL;
 }
 
-static struct dsa_switch_driver mv88e6xxx_switch_driver = {
-       .tag_protocol           = DSA_TAG_PROTO_EDSA,
+static int mv88e6xxx_port_mdb_prepare(struct dsa_switch *ds, int port,
+                                     const struct switchdev_obj_port_mdb *mdb,
+                                     struct switchdev_trans *trans)
+{
+       /* We don't need any dynamic resource from the kernel (yet),
+        * so skip the prepare phase.
+        */
+
+       return 0;
+}
+
+static void mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port,
+                                  const struct switchdev_obj_port_mdb *mdb,
+                                  struct switchdev_trans *trans)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+
+       mutex_lock(&chip->reg_lock);
+       if (mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid,
+                                        GLOBAL_ATU_DATA_STATE_MC_STATIC))
+               netdev_err(ds->ports[port].netdev, "failed to load multicast MAC address\n");
+       mutex_unlock(&chip->reg_lock);
+}
+
+static int mv88e6xxx_port_mdb_del(struct dsa_switch *ds, int port,
+                                 const struct switchdev_obj_port_mdb *mdb)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+       int err;
+
+       mutex_lock(&chip->reg_lock);
+       err = mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid,
+                                          GLOBAL_ATU_DATA_STATE_UNUSED);
+       mutex_unlock(&chip->reg_lock);
+
+       return err;
+}
+
+static int mv88e6xxx_port_mdb_dump(struct dsa_switch *ds, int port,
+                                  struct switchdev_obj_port_mdb *mdb,
+                                  int (*cb)(struct switchdev_obj *obj))
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+       int err;
+
+       mutex_lock(&chip->reg_lock);
+       err = mv88e6xxx_port_db_dump(chip, port, &mdb->obj, cb);
+       mutex_unlock(&chip->reg_lock);
+
+       return err;
+}
+
+static struct dsa_switch_ops mv88e6xxx_switch_ops = {
        .probe                  = mv88e6xxx_drv_probe,
+       .get_tag_protocol       = mv88e6xxx_get_tag_protocol,
        .setup                  = mv88e6xxx_setup,
        .set_addr               = mv88e6xxx_set_addr,
        .adjust_link            = mv88e6xxx_adjust_link,
@@ -4011,6 +3657,10 @@ static struct dsa_switch_driver mv88e6xxx_switch_driver = {
        .port_fdb_add           = mv88e6xxx_port_fdb_add,
        .port_fdb_del           = mv88e6xxx_port_fdb_del,
        .port_fdb_dump          = mv88e6xxx_port_fdb_dump,
+       .port_mdb_prepare       = mv88e6xxx_port_mdb_prepare,
+       .port_mdb_add           = mv88e6xxx_port_mdb_add,
+       .port_mdb_del           = mv88e6xxx_port_mdb_del,
+       .port_mdb_dump          = mv88e6xxx_port_mdb_dump,
 };
 
 static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip,
@@ -4025,7 +3675,7 @@ static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip,
 
        ds->dev = dev;
        ds->priv = chip;
-       ds->drv = &mv88e6xxx_switch_driver;
+       ds->ops = &mv88e6xxx_switch_ops;
 
        dev_set_drvdata(dev, ds);
 
@@ -4090,8 +3740,9 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 static void mv88e6xxx_remove(struct mdio_device *mdiodev)
 {
        struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
-       struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+       struct mv88e6xxx_chip *chip = ds->priv;
 
+       mv88e6xxx_phy_destroy(chip);
        mv88e6xxx_unregister_switch(chip);
        mv88e6xxx_mdio_unregister(chip);
 }
@@ -4117,7 +3768,7 @@ static struct mdio_driver mv88e6xxx_driver = {
 
 static int __init mv88e6xxx_init(void)
 {
-       register_switch_driver(&mv88e6xxx_switch_driver);
+       register_switch_driver(&mv88e6xxx_switch_ops);
        return mdio_driver_register(&mv88e6xxx_driver);
 }
 module_init(mv88e6xxx_init);
@@ -4125,7 +3776,7 @@ module_init(mv88e6xxx_init);
 static void __exit mv88e6xxx_cleanup(void)
 {
        mdio_driver_unregister(&mv88e6xxx_driver);
-       unregister_switch_driver(&mv88e6xxx_switch_driver);
+       unregister_switch_driver(&mv88e6xxx_switch_ops);
 }
 module_exit(mv88e6xxx_cleanup);
 
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
new file mode 100644 (file)
index 0000000..99ed028
--- /dev/null
@@ -0,0 +1,471 @@
+/*
+ * Marvell 88E6xxx Switch Global 2 Registers support (device address 0x1C)
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "mv88e6xxx.h"
+#include "global2.h"
+
+/* Offset 0x06: Device Mapping Table register */
+
+static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip,
+                                            int target, int port)
+{
+       u16 val = (target << 8) | (port & 0xf);
+
+       return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING, val);
+}
+
+static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip)
+{
+       int target, port;
+       int err;
+
+       /* Initialize the routing port to the 32 possible target devices */
+       for (target = 0; target < 32; ++target) {
+               port = 0xf;
+
+               if (target < DSA_MAX_SWITCHES) {
+                       port = chip->ds->rtable[target];
+                       if (port == DSA_RTABLE_NONE)
+                               port = 0xf;
+               }
+
+               err = mv88e6xxx_g2_device_mapping_write(chip, target, port);
+               if (err)
+                       break;
+       }
+
+       return err;
+}
+
+/* Offset 0x07: Trunk Mask Table register */
+
+static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num,
+                                        bool hask, u16 mask)
+{
+       const u16 port_mask = BIT(chip->info->num_ports) - 1;
+       u16 val = (num << 12) | (mask & port_mask);
+
+       if (hask)
+               val |= GLOBAL2_TRUNK_MASK_HASK;
+
+       return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MASK, val);
+}
+
+/* Offset 0x08: Trunk Mapping Table register */
+
+static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id,
+                                           u16 map)
+{
+       const u16 port_mask = BIT(chip->info->num_ports) - 1;
+       u16 val = (id << 11) | (map & port_mask);
+
+       return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING, val);
+}
+
+static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip)
+{
+       const u16 port_mask = BIT(chip->info->num_ports) - 1;
+       int i, err;
+
+       /* Clear all eight possible Trunk Mask vectors */
+       for (i = 0; i < 8; ++i) {
+               err = mv88e6xxx_g2_trunk_mask_write(chip, i, false, port_mask);
+               if (err)
+                       return err;
+       }
+
+       /* Clear all sixteen possible Trunk ID routing vectors */
+       for (i = 0; i < 16; ++i) {
+               err = mv88e6xxx_g2_trunk_mapping_write(chip, i, 0);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+/* Offset 0x09: Ingress Rate Command register
+ * Offset 0x0A: Ingress Rate Data register
+ */
+
+static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip)
+{
+       int port, err;
+
+       /* Init all Ingress Rate Limit resources of all ports */
+       for (port = 0; port < chip->info->num_ports; ++port) {
+               /* XXX newer chips (like 88E6390) have different 2-bit ops */
+               err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD,
+                                     GLOBAL2_IRL_CMD_OP_INIT_ALL |
+                                     (port << 8));
+               if (err)
+                       break;
+
+               /* Wait for the operation to complete */
+               err = mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD,
+                                    GLOBAL2_IRL_CMD_BUSY);
+               if (err)
+                       break;
+       }
+
+       return err;
+}
+
+/* Offset 0x0D: Switch MAC/WoL/WoF register */
+
+static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip,
+                                        unsigned int pointer, u8 data)
+{
+       u16 val = (pointer << 8) | data;
+
+       return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MAC, val);
+}
+
+int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr)
+{
+       int i, err;
+
+       for (i = 0; i < 6; i++) {
+               err = mv88e6xxx_g2_switch_mac_write(chip, i, addr[i]);
+               if (err)
+                       break;
+       }
+
+       return err;
+}
+
+/* Offset 0x0F: Priority Override Table */
+
+static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer,
+                                 u8 data)
+{
+       u16 val = (pointer << 8) | (data & 0x7);
+
+       return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, val);
+}
+
+static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip)
+{
+       int i, err;
+
+       /* Clear all sixteen possible Priority Override entries */
+       for (i = 0; i < 16; i++) {
+               err = mv88e6xxx_g2_pot_write(chip, i, 0);
+               if (err)
+                       break;
+       }
+
+       return err;
+}
+
+/* Offset 0x14: EEPROM Command
+ * Offset 0x15: EEPROM Data
+ */
+
+static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip)
+{
+       return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD,
+                             GLOBAL2_EEPROM_CMD_BUSY |
+                             GLOBAL2_EEPROM_CMD_RUNNING);
+}
+
+static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
+{
+       int err;
+
+       err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, cmd);
+       if (err)
+               return err;
+
+       return mv88e6xxx_g2_eeprom_wait(chip);
+}
+
+static int mv88e6xxx_g2_eeprom_read16(struct mv88e6xxx_chip *chip,
+                                     u8 addr, u16 *data)
+{
+       u16 cmd = GLOBAL2_EEPROM_CMD_OP_READ | addr;
+       int err;
+
+       err = mv88e6xxx_g2_eeprom_wait(chip);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_g2_eeprom_cmd(chip, cmd);
+       if (err)
+               return err;
+
+       return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data);
+}
+
+static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip,
+                                      u8 addr, u16 data)
+{
+       u16 cmd = GLOBAL2_EEPROM_CMD_OP_WRITE | addr;
+       int err;
+
+       err = mv88e6xxx_g2_eeprom_wait(chip);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data);
+       if (err)
+               return err;
+
+       return mv88e6xxx_g2_eeprom_cmd(chip, cmd);
+}
+
+int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip,
+                             struct ethtool_eeprom *eeprom, u8 *data)
+{
+       unsigned int offset = eeprom->offset;
+       unsigned int len = eeprom->len;
+       u16 val;
+       int err;
+
+       eeprom->len = 0;
+
+       if (offset & 1) {
+               err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+               if (err)
+                       return err;
+
+               *data++ = (val >> 8) & 0xff;
+
+               offset++;
+               len--;
+               eeprom->len++;
+       }
+
+       while (len >= 2) {
+               err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+               if (err)
+                       return err;
+
+               *data++ = val & 0xff;
+               *data++ = (val >> 8) & 0xff;
+
+               offset += 2;
+               len -= 2;
+               eeprom->len += 2;
+       }
+
+       if (len) {
+               err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+               if (err)
+                       return err;
+
+               *data++ = val & 0xff;
+
+               offset++;
+               len--;
+               eeprom->len++;
+       }
+
+       return 0;
+}
+
+int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip,
+                             struct ethtool_eeprom *eeprom, u8 *data)
+{
+       unsigned int offset = eeprom->offset;
+       unsigned int len = eeprom->len;
+       u16 val;
+       int err;
+
+       /* Ensure the RO WriteEn bit is set */
+       err = mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, &val);
+       if (err)
+               return err;
+
+       if (!(val & GLOBAL2_EEPROM_CMD_WRITE_EN))
+               return -EROFS;
+
+       eeprom->len = 0;
+
+       if (offset & 1) {
+               err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+               if (err)
+                       return err;
+
+               val = (*data++ << 8) | (val & 0xff);
+
+               err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
+               if (err)
+                       return err;
+
+               offset++;
+               len--;
+               eeprom->len++;
+       }
+
+       while (len >= 2) {
+               val = *data++;
+               val |= *data++ << 8;
+
+               err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
+               if (err)
+                       return err;
+
+               offset += 2;
+               len -= 2;
+               eeprom->len += 2;
+       }
+
+       if (len) {
+               err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+               if (err)
+                       return err;
+
+               val = (val & 0xff00) | *data++;
+
+               err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
+               if (err)
+                       return err;
+
+               offset++;
+               len--;
+               eeprom->len++;
+       }
+
+       return 0;
+}
+
+/* Offset 0x18: SMI PHY Command Register
+ * Offset 0x19: SMI PHY Data Register
+ */
+
+static int mv88e6xxx_g2_smi_phy_wait(struct mv88e6xxx_chip *chip)
+{
+       return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD,
+                             GLOBAL2_SMI_PHY_CMD_BUSY);
+}
+
+static int mv88e6xxx_g2_smi_phy_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
+{
+       int err;
+
+       err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, cmd);
+       if (err)
+               return err;
+
+       return mv88e6xxx_g2_smi_phy_wait(chip);
+}
+
+int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, int reg,
+                             u16 *val)
+{
+       u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA | (addr << 5) | reg;
+       int err;
+
+       err = mv88e6xxx_g2_smi_phy_wait(chip);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_g2_smi_phy_cmd(chip, cmd);
+       if (err)
+               return err;
+
+       return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val);
+}
+
+int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg,
+                              u16 val)
+{
+       u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA | (addr << 5) | reg;
+       int err;
+
+       err = mv88e6xxx_g2_smi_phy_wait(chip);
+       if (err)
+               return err;
+
+       err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val);
+       if (err)
+               return err;
+
+       return mv88e6xxx_g2_smi_phy_cmd(chip, cmd);
+}
+
+int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
+{
+       u16 reg;
+       int err;
+
+       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) {
+               /* Consider the frames with reserved multicast destination
+                * addresses matching 01:80:c2:00:00:2x as MGMT.
+                */
+               err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_2X,
+                                     0xffff);
+               if (err)
+                       return err;
+       }
+
+       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X)) {
+               /* Consider the frames with reserved multicast destination
+                * addresses matching 01:80:c2:00:00:0x as MGMT.
+                */
+               err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X,
+                                     0xffff);
+               if (err)
+                       return err;
+       }
+
+       /* Ignore removed tag data on doubly tagged packets, disable
+        * flow control messages, force flow control priority to the
+        * highest, and send all special multicast frames to the CPU
+        * port at the highest priority.
+        */
+       reg = GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI | (0x7 << 4);
+       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) ||
+           mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X))
+               reg |= GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x7;
+       err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, reg);
+       if (err)
+               return err;
+
+       /* Program the DSA routing table. */
+       err = mv88e6xxx_g2_set_device_mapping(chip);
+       if (err)
+               return err;
+
+       /* Clear all trunk masks and mapping. */
+       err = mv88e6xxx_g2_clear_trunk(chip);
+       if (err)
+               return err;
+
+       if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_IRL)) {
+               /* Disable ingress rate limiting by resetting all per port
+                * ingress rate limit resources to their initial state.
+                */
+               err = mv88e6xxx_g2_clear_irl(chip);
+                       if (err)
+                               return err;
+       }
+
+       if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_PVT)) {
+               /* Initialize Cross-chip Port VLAN Table to reset defaults */
+               err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_PVT_ADDR,
+                                     GLOBAL2_PVT_ADDR_OP_INIT_ONES);
+               if (err)
+                       return err;
+       }
+
+       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) {
+               /* Clear the priority override table. */
+               err = mv88e6xxx_g2_clear_pot(chip);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
new file mode 100644 (file)
index 0000000..c4bb903
--- /dev/null
@@ -0,0 +1,88 @@
+/*
+ * Marvell 88E6xxx Switch Global 2 Registers support (device address 0x1C)
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _MV88E6XXX_GLOBAL2_H
+#define _MV88E6XXX_GLOBAL2_H
+
+#include "mv88e6xxx.h"
+
+#ifdef CONFIG_NET_DSA_MV88E6XXX_GLOBAL2
+
+static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
+{
+       return 0;
+}
+
+int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, int reg,
+                             u16 *val);
+int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg,
+                              u16 val);
+int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr);
+int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip,
+                             struct ethtool_eeprom *eeprom, u8 *data);
+int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip,
+                             struct ethtool_eeprom *eeprom, u8 *data);
+int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip);
+
+#else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
+
+static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
+{
+       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) {
+               dev_err(chip->dev, "this chip requires CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 enabled\n");
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static inline int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip,
+                                           int addr, int reg, u16 *val)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip,
+                                            int addr, int reg, u16 val)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip,
+                                             u8 *addr)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip,
+                                           struct ethtool_eeprom *eeprom,
+                                           u8 *data)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip,
+                                           struct ethtool_eeprom *eeprom,
+                                           u8 *data)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
+{
+       return -EOPNOTSUPP;
+}
+
+#endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
+
+#endif /* _MV88E6XXX_GLOBAL2_H */
index 1f9bab5..52f3f52 100644 (file)
@@ -386,6 +386,12 @@ enum mv88e6xxx_family {
 };
 
 enum mv88e6xxx_cap {
+       /* Two different tag protocols can be used by the driver. All
+        * switches support DSA, but only later generations support
+        * EDSA.
+        */
+       MV88E6XXX_CAP_EDSA,
+
        /* Energy Efficient Ethernet.
         */
        MV88E6XXX_CAP_EEE,
@@ -447,6 +453,7 @@ enum mv88e6xxx_cap {
 };
 
 /* Bitmask of capabilities */
+#define MV88E6XXX_FLAG_EDSA            BIT(MV88E6XXX_CAP_EDSA)
 #define MV88E6XXX_FLAG_EEE             BIT(MV88E6XXX_CAP_EEE)
 
 #define MV88E6XXX_FLAG_SMI_CMD         BIT(MV88E6XXX_CAP_SMI_CMD)
@@ -547,7 +554,8 @@ enum mv88e6xxx_cap {
         MV88E6XXX_FLAG_VTU)
 
 #define MV88E6XXX_FLAGS_FAMILY_6320    \
-       (MV88E6XXX_FLAG_EEE |           \
+       (MV88E6XXX_FLAG_EDSA |          \
+        MV88E6XXX_FLAG_EEE |           \
         MV88E6XXX_FLAG_GLOBAL2 |       \
         MV88E6XXX_FLAG_G2_MGMT_EN_2X | \
         MV88E6XXX_FLAG_G2_MGMT_EN_0X | \
@@ -564,7 +572,8 @@ enum mv88e6xxx_cap {
         MV88E6XXX_FLAGS_SMI_PHY)
 
 #define MV88E6XXX_FLAGS_FAMILY_6351    \
-       (MV88E6XXX_FLAG_GLOBAL2 |       \
+       (MV88E6XXX_FLAG_EDSA |          \
+        MV88E6XXX_FLAG_GLOBAL2 |       \
         MV88E6XXX_FLAG_G2_MGMT_EN_2X | \
         MV88E6XXX_FLAG_G2_MGMT_EN_0X | \
         MV88E6XXX_FLAG_G2_SWITCH_MAC | \
@@ -579,7 +588,8 @@ enum mv88e6xxx_cap {
         MV88E6XXX_FLAGS_SMI_PHY)
 
 #define MV88E6XXX_FLAGS_FAMILY_6352    \
-       (MV88E6XXX_FLAG_EEE |           \
+       (MV88E6XXX_FLAG_EDSA |          \
+        MV88E6XXX_FLAG_EEE |           \
         MV88E6XXX_FLAG_GLOBAL2 |       \
         MV88E6XXX_FLAG_G2_MGMT_EN_2X | \
         MV88E6XXX_FLAG_G2_MGMT_EN_0X | \
@@ -708,4 +718,10 @@ static inline bool mv88e6xxx_has(struct mv88e6xxx_chip *chip,
        return (chip->info->flags & flags) == flags;
 }
 
+int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
+int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
+int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
+                    u16 update);
+int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask);
+
 #endif
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
new file mode 100644 (file)
index 0000000..7f3f178
--- /dev/null
@@ -0,0 +1,1060 @@
+/*
+ * Copyright (C) 2009 Felix Fietkau <nbd@nbd.name>
+ * Copyright (C) 2011-2012 Gabor Juhos <juhosg@openwrt.org>
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2016 John Crispin <john@phrozen.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/phy.h>
+#include <linux/netdevice.h>
+#include <net/dsa.h>
+#include <net/switchdev.h>
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
+#include <linux/if_bridge.h>
+#include <linux/mdio.h>
+#include <linux/etherdevice.h>
+
+#include "qca8k.h"
+
+#define MIB_DESC(_s, _o, _n)   \
+       {                       \
+               .size = (_s),   \
+               .offset = (_o), \
+               .name = (_n),   \
+       }
+
+static const struct qca8k_mib_desc ar8327_mib[] = {
+       MIB_DESC(1, 0x00, "RxBroad"),
+       MIB_DESC(1, 0x04, "RxPause"),
+       MIB_DESC(1, 0x08, "RxMulti"),
+       MIB_DESC(1, 0x0c, "RxFcsErr"),
+       MIB_DESC(1, 0x10, "RxAlignErr"),
+       MIB_DESC(1, 0x14, "RxRunt"),
+       MIB_DESC(1, 0x18, "RxFragment"),
+       MIB_DESC(1, 0x1c, "Rx64Byte"),
+       MIB_DESC(1, 0x20, "Rx128Byte"),
+       MIB_DESC(1, 0x24, "Rx256Byte"),
+       MIB_DESC(1, 0x28, "Rx512Byte"),
+       MIB_DESC(1, 0x2c, "Rx1024Byte"),
+       MIB_DESC(1, 0x30, "Rx1518Byte"),
+       MIB_DESC(1, 0x34, "RxMaxByte"),
+       MIB_DESC(1, 0x38, "RxTooLong"),
+       MIB_DESC(2, 0x3c, "RxGoodByte"),
+       MIB_DESC(2, 0x44, "RxBadByte"),
+       MIB_DESC(1, 0x4c, "RxOverFlow"),
+       MIB_DESC(1, 0x50, "Filtered"),
+       MIB_DESC(1, 0x54, "TxBroad"),
+       MIB_DESC(1, 0x58, "TxPause"),
+       MIB_DESC(1, 0x5c, "TxMulti"),
+       MIB_DESC(1, 0x60, "TxUnderRun"),
+       MIB_DESC(1, 0x64, "Tx64Byte"),
+       MIB_DESC(1, 0x68, "Tx128Byte"),
+       MIB_DESC(1, 0x6c, "Tx256Byte"),
+       MIB_DESC(1, 0x70, "Tx512Byte"),
+       MIB_DESC(1, 0x74, "Tx1024Byte"),
+       MIB_DESC(1, 0x78, "Tx1518Byte"),
+       MIB_DESC(1, 0x7c, "TxMaxByte"),
+       MIB_DESC(1, 0x80, "TxOverSize"),
+       MIB_DESC(2, 0x84, "TxByte"),
+       MIB_DESC(1, 0x8c, "TxCollision"),
+       MIB_DESC(1, 0x90, "TxAbortCol"),
+       MIB_DESC(1, 0x94, "TxMultiCol"),
+       MIB_DESC(1, 0x98, "TxSingleCol"),
+       MIB_DESC(1, 0x9c, "TxExcDefer"),
+       MIB_DESC(1, 0xa0, "TxDefer"),
+       MIB_DESC(1, 0xa4, "TxLateCol"),
+};
+
+/* The 32bit switch registers are accessed indirectly. To achieve this we need
+ * to set the page of the register. Track the last page that was set to reduce
+ * mdio writes
+ */
+static u16 qca8k_current_page = 0xffff;
+
+static void
+qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page)
+{
+       regaddr >>= 1;
+       *r1 = regaddr & 0x1e;
+
+       regaddr >>= 5;
+       *r2 = regaddr & 0x7;
+
+       regaddr >>= 3;
+       *page = regaddr & 0x3ff;
+}
+
+static u32
+qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum)
+{
+       u32 val;
+       int ret;
+
+       ret = bus->read(bus, phy_id, regnum);
+       if (ret >= 0) {
+               val = ret;
+               ret = bus->read(bus, phy_id, regnum + 1);
+               val |= ret << 16;
+       }
+
+       if (ret < 0) {
+               dev_err_ratelimited(&bus->dev,
+                                   "failed to read qca8k 32bit register\n");
+               return ret;
+       }
+
+       return val;
+}
+
+static void
+qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val)
+{
+       u16 lo, hi;
+       int ret;
+
+       lo = val & 0xffff;
+       hi = (u16)(val >> 16);
+
+       ret = bus->write(bus, phy_id, regnum, lo);
+       if (ret >= 0)
+               ret = bus->write(bus, phy_id, regnum + 1, hi);
+       if (ret < 0)
+               dev_err_ratelimited(&bus->dev,
+                                   "failed to write qca8k 32bit register\n");
+}
+
+static void
+qca8k_set_page(struct mii_bus *bus, u16 page)
+{
+       if (page == qca8k_current_page)
+               return;
+
+       if (bus->write(bus, 0x18, 0, page) < 0)
+               dev_err_ratelimited(&bus->dev,
+                                   "failed to set qca8k page\n");
+       qca8k_current_page = page;
+}
+
+static u32
+qca8k_read(struct qca8k_priv *priv, u32 reg)
+{
+       u16 r1, r2, page;
+       u32 val;
+
+       qca8k_split_addr(reg, &r1, &r2, &page);
+
+       mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED);
+
+       qca8k_set_page(priv->bus, page);
+       val = qca8k_mii_read32(priv->bus, 0x10 | r2, r1);
+
+       mutex_unlock(&priv->bus->mdio_lock);
+
+       return val;
+}
+
+static void
+qca8k_write(struct qca8k_priv *priv, u32 reg, u32 val)
+{
+       u16 r1, r2, page;
+
+       qca8k_split_addr(reg, &r1, &r2, &page);
+
+       mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED);
+
+       qca8k_set_page(priv->bus, page);
+       qca8k_mii_write32(priv->bus, 0x10 | r2, r1, val);
+
+       mutex_unlock(&priv->bus->mdio_lock);
+}
+
+static u32
+qca8k_rmw(struct qca8k_priv *priv, u32 reg, u32 mask, u32 val)
+{
+       u16 r1, r2, page;
+       u32 ret;
+
+       qca8k_split_addr(reg, &r1, &r2, &page);
+
+       mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED);
+
+       qca8k_set_page(priv->bus, page);
+       ret = qca8k_mii_read32(priv->bus, 0x10 | r2, r1);
+       ret &= ~mask;
+       ret |= val;
+       qca8k_mii_write32(priv->bus, 0x10 | r2, r1, ret);
+
+       mutex_unlock(&priv->bus->mdio_lock);
+
+       return ret;
+}
+
+static void
+qca8k_reg_set(struct qca8k_priv *priv, u32 reg, u32 val)
+{
+       qca8k_rmw(priv, reg, 0, val);
+}
+
+static void
+qca8k_reg_clear(struct qca8k_priv *priv, u32 reg, u32 val)
+{
+       qca8k_rmw(priv, reg, val, 0);
+}
+
+static int
+qca8k_regmap_read(void *ctx, uint32_t reg, uint32_t *val)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ctx;
+
+       *val = qca8k_read(priv, reg);
+
+       return 0;
+}
+
+static int
+qca8k_regmap_write(void *ctx, uint32_t reg, uint32_t val)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ctx;
+
+       qca8k_write(priv, reg, val);
+
+       return 0;
+}
+
+static const struct regmap_range qca8k_readable_ranges[] = {
+       regmap_reg_range(0x0000, 0x00e4), /* Global control */
+       regmap_reg_range(0x0100, 0x0168), /* EEE control */
+       regmap_reg_range(0x0200, 0x0270), /* Parser control */
+       regmap_reg_range(0x0400, 0x0454), /* ACL */
+       regmap_reg_range(0x0600, 0x0718), /* Lookup */
+       regmap_reg_range(0x0800, 0x0b70), /* QM */
+       regmap_reg_range(0x0c00, 0x0c80), /* PKT */
+       regmap_reg_range(0x0e00, 0x0e98), /* L3 */
+       regmap_reg_range(0x1000, 0x10ac), /* MIB - Port0 */
+       regmap_reg_range(0x1100, 0x11ac), /* MIB - Port1 */
+       regmap_reg_range(0x1200, 0x12ac), /* MIB - Port2 */
+       regmap_reg_range(0x1300, 0x13ac), /* MIB - Port3 */
+       regmap_reg_range(0x1400, 0x14ac), /* MIB - Port4 */
+       regmap_reg_range(0x1500, 0x15ac), /* MIB - Port5 */
+       regmap_reg_range(0x1600, 0x16ac), /* MIB - Port6 */
+
+};
+
+static struct regmap_access_table qca8k_readable_table = {
+       .yes_ranges = qca8k_readable_ranges,
+       .n_yes_ranges = ARRAY_SIZE(qca8k_readable_ranges),
+};
+
+struct regmap_config qca8k_regmap_config = {
+       .reg_bits = 16,
+       .val_bits = 32,
+       .reg_stride = 4,
+       .max_register = 0x16ac, /* end MIB - Port6 range */
+       .reg_read = qca8k_regmap_read,
+       .reg_write = qca8k_regmap_write,
+       .rd_table = &qca8k_readable_table,
+};
+
+static int
+qca8k_busy_wait(struct qca8k_priv *priv, u32 reg, u32 mask)
+{
+       unsigned long timeout;
+
+       timeout = jiffies + msecs_to_jiffies(20);
+
+       /* loop until the busy flag has cleared */
+       do {
+               u32 val = qca8k_read(priv, reg);
+               int busy = val & mask;
+
+               if (!busy)
+                       break;
+               cond_resched();
+       } while (!time_after_eq(jiffies, timeout));
+
+       return time_after_eq(jiffies, timeout);
+}
+
+static void
+qca8k_fdb_read(struct qca8k_priv *priv, struct qca8k_fdb *fdb)
+{
+       u32 reg[4];
+       int i;
+
+       /* load the ARL table into an array */
+       for (i = 0; i < 4; i++)
+               reg[i] = qca8k_read(priv, QCA8K_REG_ATU_DATA0 + (i * 4));
+
+       /* vid - 83:72 */
+       fdb->vid = (reg[2] >> QCA8K_ATU_VID_S) & QCA8K_ATU_VID_M;
+       /* aging - 67:64 */
+       fdb->aging = reg[2] & QCA8K_ATU_STATUS_M;
+       /* portmask - 54:48 */
+       fdb->port_mask = (reg[1] >> QCA8K_ATU_PORT_S) & QCA8K_ATU_PORT_M;
+       /* mac - 47:0 */
+       fdb->mac[0] = (reg[1] >> QCA8K_ATU_ADDR0_S) & 0xff;
+       fdb->mac[1] = reg[1] & 0xff;
+       fdb->mac[2] = (reg[0] >> QCA8K_ATU_ADDR2_S) & 0xff;
+       fdb->mac[3] = (reg[0] >> QCA8K_ATU_ADDR3_S) & 0xff;
+       fdb->mac[4] = (reg[0] >> QCA8K_ATU_ADDR4_S) & 0xff;
+       fdb->mac[5] = reg[0] & 0xff;
+}
+
+static void
+qca8k_fdb_write(struct qca8k_priv *priv, u16 vid, u8 port_mask, const u8 *mac,
+               u8 aging)
+{
+       u32 reg[3] = { 0 };
+       int i;
+
+       /* vid - 83:72 */
+       reg[2] = (vid & QCA8K_ATU_VID_M) << QCA8K_ATU_VID_S;
+       /* aging - 67:64 */
+       reg[2] |= aging & QCA8K_ATU_STATUS_M;
+       /* portmask - 54:48 */
+       reg[1] = (port_mask & QCA8K_ATU_PORT_M) << QCA8K_ATU_PORT_S;
+       /* mac - 47:0 */
+       reg[1] |= mac[0] << QCA8K_ATU_ADDR0_S;
+       reg[1] |= mac[1];
+       reg[0] |= mac[2] << QCA8K_ATU_ADDR2_S;
+       reg[0] |= mac[3] << QCA8K_ATU_ADDR3_S;
+       reg[0] |= mac[4] << QCA8K_ATU_ADDR4_S;
+       reg[0] |= mac[5];
+
+       /* load the array into the ARL table */
+       for (i = 0; i < 3; i++)
+               qca8k_write(priv, QCA8K_REG_ATU_DATA0 + (i * 4), reg[i]);
+}
+
+static int
+qca8k_fdb_access(struct qca8k_priv *priv, enum qca8k_fdb_cmd cmd, int port)
+{
+       u32 reg;
+
+       /* Set the command and FDB index */
+       reg = QCA8K_ATU_FUNC_BUSY;
+       reg |= cmd;
+       if (port >= 0) {
+               reg |= QCA8K_ATU_FUNC_PORT_EN;
+               reg |= (port & QCA8K_ATU_FUNC_PORT_M) << QCA8K_ATU_FUNC_PORT_S;
+       }
+
+       /* Write the function register triggering the table access */
+       qca8k_write(priv, QCA8K_REG_ATU_FUNC, reg);
+
+       /* wait for completion */
+       if (qca8k_busy_wait(priv, QCA8K_REG_ATU_FUNC, QCA8K_ATU_FUNC_BUSY))
+               return -1;
+
+       /* Check for table full violation when adding an entry */
+       if (cmd == QCA8K_FDB_LOAD) {
+               reg = qca8k_read(priv, QCA8K_REG_ATU_FUNC);
+               if (reg & QCA8K_ATU_FUNC_FULL)
+                       return -1;
+       }
+
+       return 0;
+}
+
+static int
+qca8k_fdb_next(struct qca8k_priv *priv, struct qca8k_fdb *fdb, int port)
+{
+       int ret;
+
+       qca8k_fdb_write(priv, fdb->vid, fdb->port_mask, fdb->mac, fdb->aging);
+       ret = qca8k_fdb_access(priv, QCA8K_FDB_NEXT, port);
+       if (ret >= 0)
+               qca8k_fdb_read(priv, fdb);
+
+       return ret;
+}
+
+static int
+qca8k_fdb_add(struct qca8k_priv *priv, const u8 *mac, u16 port_mask,
+             u16 vid, u8 aging)
+{
+       int ret;
+
+       mutex_lock(&priv->reg_mutex);
+       qca8k_fdb_write(priv, vid, port_mask, mac, aging);
+       ret = qca8k_fdb_access(priv, QCA8K_FDB_LOAD, -1);
+       mutex_unlock(&priv->reg_mutex);
+
+       return ret;
+}
+
+static int
+qca8k_fdb_del(struct qca8k_priv *priv, const u8 *mac, u16 port_mask, u16 vid)
+{
+       int ret;
+
+       mutex_lock(&priv->reg_mutex);
+       qca8k_fdb_write(priv, vid, port_mask, mac, 0);
+       ret = qca8k_fdb_access(priv, QCA8K_FDB_PURGE, -1);
+       mutex_unlock(&priv->reg_mutex);
+
+       return ret;
+}
+
+static void
+qca8k_fdb_flush(struct qca8k_priv *priv)
+{
+       mutex_lock(&priv->reg_mutex);
+       qca8k_fdb_access(priv, QCA8K_FDB_FLUSH, -1);
+       mutex_unlock(&priv->reg_mutex);
+}
+
+static void
+qca8k_mib_init(struct qca8k_priv *priv)
+{
+       mutex_lock(&priv->reg_mutex);
+       qca8k_reg_set(priv, QCA8K_REG_MIB, QCA8K_MIB_FLUSH | QCA8K_MIB_BUSY);
+       qca8k_busy_wait(priv, QCA8K_REG_MIB, QCA8K_MIB_BUSY);
+       qca8k_reg_set(priv, QCA8K_REG_MIB, QCA8K_MIB_CPU_KEEP);
+       qca8k_write(priv, QCA8K_REG_MODULE_EN, QCA8K_MODULE_EN_MIB);
+       mutex_unlock(&priv->reg_mutex);
+}
+
+static int
+qca8k_set_pad_ctrl(struct qca8k_priv *priv, int port, int mode)
+{
+       u32 reg;
+
+       switch (port) {
+       case 0:
+               reg = QCA8K_REG_PORT0_PAD_CTRL;
+               break;
+       case 6:
+               reg = QCA8K_REG_PORT6_PAD_CTRL;
+               break;
+       default:
+               pr_err("Can't set PAD_CTRL on port %d\n", port);
+               return -EINVAL;
+       }
+
+       /* Configure a port to be directly connected to an external
+        * PHY or MAC.
+        */
+       switch (mode) {
+       case PHY_INTERFACE_MODE_RGMII:
+               qca8k_write(priv, reg,
+                           QCA8K_PORT_PAD_RGMII_EN |
+                           QCA8K_PORT_PAD_RGMII_TX_DELAY(3) |
+                           QCA8K_PORT_PAD_RGMII_RX_DELAY(3));
+
+               /* According to the datasheet, RGMII delay is enabled through
+                * PORT5_PAD_CTRL for all ports, rather than individual port
+                * registers
+                */
+               qca8k_write(priv, QCA8K_REG_PORT5_PAD_CTRL,
+                           QCA8K_PORT_PAD_RGMII_RX_DELAY_EN);
+               break;
+       case PHY_INTERFACE_MODE_SGMII:
+               qca8k_write(priv, reg, QCA8K_PORT_PAD_SGMII_EN);
+               break;
+       default:
+               pr_err("xMII mode %d not supported\n", mode);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static void
+qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable)
+{
+       u32 mask = QCA8K_PORT_STATUS_TXMAC;
+
+       /* Port 0 and 6 have no internal PHY */
+       if ((port > 0) && (port < 6))
+               mask |= QCA8K_PORT_STATUS_LINK_AUTO;
+
+       if (enable)
+               qca8k_reg_set(priv, QCA8K_REG_PORT_STATUS(port), mask);
+       else
+               qca8k_reg_clear(priv, QCA8K_REG_PORT_STATUS(port), mask);
+}
+
+static int
+qca8k_setup(struct dsa_switch *ds)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+       int ret, i, phy_mode = -1;
+
+       /* Make sure that port 0 is the cpu port */
+       if (!dsa_is_cpu_port(ds, 0)) {
+               pr_err("port 0 is not the CPU port\n");
+               return -EINVAL;
+       }
+
+       mutex_init(&priv->reg_mutex);
+
+       /* Start by setting up the register mapping */
+       priv->regmap = devm_regmap_init(ds->dev, NULL, priv,
+                                       &qca8k_regmap_config);
+       if (IS_ERR(priv->regmap))
+               pr_warn("regmap initialization failed");
+
+       /* Initialize CPU port pad mode (xMII type, delays...) */
+       phy_mode = of_get_phy_mode(ds->ports[ds->dst->cpu_port].dn);
+       if (phy_mode < 0) {
+               pr_err("Can't find phy-mode for master device\n");
+               return phy_mode;
+       }
+       ret = qca8k_set_pad_ctrl(priv, QCA8K_CPU_PORT, phy_mode);
+       if (ret < 0)
+               return ret;
+
+       /* Enable CPU Port */
+       qca8k_reg_set(priv, QCA8K_REG_GLOBAL_FW_CTRL0,
+                     QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN);
+       qca8k_port_set_status(priv, QCA8K_CPU_PORT, 1);
+       priv->port_sts[QCA8K_CPU_PORT].enabled = 1;
+
+       /* Enable MIB counters */
+       qca8k_mib_init(priv);
+
+       /* Enable QCA header mode on the cpu port */
+       qca8k_write(priv, QCA8K_REG_PORT_HDR_CTRL(QCA8K_CPU_PORT),
+                   QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_TX_S |
+                   QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_RX_S);
+
+       /* Disable forwarding by default on all ports */
+       for (i = 0; i < QCA8K_NUM_PORTS; i++)
+               qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i),
+                         QCA8K_PORT_LOOKUP_MEMBER, 0);
+
+       /* Disable MAC by default on all user ports */
+       for (i = 1; i < QCA8K_NUM_PORTS; i++)
+               if (ds->enabled_port_mask & BIT(i))
+                       qca8k_port_set_status(priv, i, 0);
+
+       /* Forward all unknown frames to CPU port for Linux processing */
+       qca8k_write(priv, QCA8K_REG_GLOBAL_FW_CTRL1,
+                   BIT(0) << QCA8K_GLOBAL_FW_CTRL1_IGMP_DP_S |
+                   BIT(0) << QCA8K_GLOBAL_FW_CTRL1_BC_DP_S |
+                   BIT(0) << QCA8K_GLOBAL_FW_CTRL1_MC_DP_S |
+                   BIT(0) << QCA8K_GLOBAL_FW_CTRL1_UC_DP_S);
+
+       /* Setup connection between CPU port & user ports */
+       for (i = 0; i < DSA_MAX_PORTS; i++) {
+               /* CPU port gets connected to all user ports of the switch */
+               if (dsa_is_cpu_port(ds, i)) {
+                       qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(QCA8K_CPU_PORT),
+                                 QCA8K_PORT_LOOKUP_MEMBER,
+                                 ds->enabled_port_mask);
+               }
+
+               /* Invividual user ports get connected to CPU port only */
+               if (ds->enabled_port_mask & BIT(i)) {
+                       int shift = 16 * (i % 2);
+
+                       qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i),
+                                 QCA8K_PORT_LOOKUP_MEMBER,
+                                 BIT(QCA8K_CPU_PORT));
+
+                       /* Enable ARP Auto-learning by default */
+                       qca8k_reg_set(priv, QCA8K_PORT_LOOKUP_CTRL(i),
+                                     QCA8K_PORT_LOOKUP_LEARN);
+
+                       /* For port based vlans to work we need to set the
+                        * default egress vid
+                        */
+                       qca8k_rmw(priv, QCA8K_EGRESS_VLAN(i),
+                                 0xffff << shift, 1 << shift);
+                       qca8k_write(priv, QCA8K_REG_PORT_VLAN_CTRL0(i),
+                                   QCA8K_PORT_VLAN_CVID(1) |
+                                   QCA8K_PORT_VLAN_SVID(1));
+               }
+       }
+
+       /* Flush the FDB table */
+       qca8k_fdb_flush(priv);
+
+       return 0;
+}
+
+static int
+qca8k_set_addr(struct dsa_switch *ds, u8 *addr)
+{
+       /* The subsystem always calls this function so add an empty stub */
+       return 0;
+}
+
+static int
+qca8k_phy_read(struct dsa_switch *ds, int phy, int regnum)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+
+       return mdiobus_read(priv->bus, phy, regnum);
+}
+
+static int
+qca8k_phy_write(struct dsa_switch *ds, int phy, int regnum, u16 val)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+
+       return mdiobus_write(priv->bus, phy, regnum, val);
+}
+
+static void
+qca8k_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(ar8327_mib); i++)
+               strncpy(data + i * ETH_GSTRING_LEN, ar8327_mib[i].name,
+                       ETH_GSTRING_LEN);
+}
+
+static void
+qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
+                       uint64_t *data)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+       const struct qca8k_mib_desc *mib;
+       u32 reg, i;
+       u64 hi;
+
+       for (i = 0; i < ARRAY_SIZE(ar8327_mib); i++) {
+               mib = &ar8327_mib[i];
+               reg = QCA8K_PORT_MIB_COUNTER(port) + mib->offset;
+
+               data[i] = qca8k_read(priv, reg);
+               if (mib->size == 2) {
+                       hi = qca8k_read(priv, reg + 4);
+                       data[i] |= hi << 32;
+               }
+       }
+}
+
+static int
+qca8k_get_sset_count(struct dsa_switch *ds)
+{
+       return ARRAY_SIZE(ar8327_mib);
+}
+
+static void
+qca8k_eee_enable_set(struct dsa_switch *ds, int port, bool enable)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+       u32 lpi_en = QCA8K_REG_EEE_CTRL_LPI_EN(port);
+       u32 reg;
+
+       mutex_lock(&priv->reg_mutex);
+       reg = qca8k_read(priv, QCA8K_REG_EEE_CTRL);
+       if (enable)
+               reg |= lpi_en;
+       else
+               reg &= ~lpi_en;
+       qca8k_write(priv, QCA8K_REG_EEE_CTRL, reg);
+       mutex_unlock(&priv->reg_mutex);
+}
+
+static int
+qca8k_eee_init(struct dsa_switch *ds, int port,
+              struct phy_device *phy)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+       struct ethtool_eee *p = &priv->port_sts[port].eee;
+       int ret;
+
+       p->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_100baseT_Full);
+
+       ret = phy_init_eee(phy, 0);
+       if (ret)
+               return ret;
+
+       qca8k_eee_enable_set(ds, port, true);
+
+       return 0;
+}
+
+static int
+qca8k_set_eee(struct dsa_switch *ds, int port,
+             struct phy_device *phydev,
+             struct ethtool_eee *e)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+       struct ethtool_eee *p = &priv->port_sts[port].eee;
+       int ret = 0;
+
+       p->eee_enabled = e->eee_enabled;
+
+       if (e->eee_enabled) {
+               p->eee_enabled = qca8k_eee_init(ds, port, phydev);
+               if (!p->eee_enabled)
+                       ret = -EOPNOTSUPP;
+       }
+       qca8k_eee_enable_set(ds, port, p->eee_enabled);
+
+       return ret;
+}
+
+static int
+qca8k_get_eee(struct dsa_switch *ds, int port,
+             struct ethtool_eee *e)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+       struct ethtool_eee *p = &priv->port_sts[port].eee;
+       struct net_device *netdev = ds->ports[port].netdev;
+       int ret;
+
+       ret = phy_ethtool_get_eee(netdev->phydev, p);
+       if (!ret)
+               e->eee_active =
+                       !!(p->supported & p->advertised & p->lp_advertised);
+       else
+               e->eee_active = 0;
+
+       e->eee_enabled = p->eee_enabled;
+
+       return ret;
+}
+
+static void
+qca8k_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+       u32 stp_state;
+
+       switch (state) {
+       case BR_STATE_DISABLED:
+               stp_state = QCA8K_PORT_LOOKUP_STATE_DISABLED;
+               break;
+       case BR_STATE_BLOCKING:
+               stp_state = QCA8K_PORT_LOOKUP_STATE_BLOCKING;
+               break;
+       case BR_STATE_LISTENING:
+               stp_state = QCA8K_PORT_LOOKUP_STATE_LISTENING;
+               break;
+       case BR_STATE_LEARNING:
+               stp_state = QCA8K_PORT_LOOKUP_STATE_LEARNING;
+               break;
+       case BR_STATE_FORWARDING:
+       default:
+               stp_state = QCA8K_PORT_LOOKUP_STATE_FORWARD;
+               break;
+       }
+
+       qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port),
+                 QCA8K_PORT_LOOKUP_STATE_MASK, stp_state);
+}
+
+static int
+qca8k_port_bridge_join(struct dsa_switch *ds, int port,
+                      struct net_device *bridge)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+       int port_mask = BIT(QCA8K_CPU_PORT);
+       int i;
+
+       priv->port_sts[port].bridge_dev = bridge;
+
+       for (i = 1; i < QCA8K_NUM_PORTS; i++) {
+               if (priv->port_sts[i].bridge_dev != bridge)
+                       continue;
+               /* Add this port to the portvlan mask of the other ports
+                * in the bridge
+                */
+               qca8k_reg_set(priv,
+                             QCA8K_PORT_LOOKUP_CTRL(i),
+                             BIT(port));
+               if (i != port)
+                       port_mask |= BIT(i);
+       }
+       /* Add all other ports to this ports portvlan mask */
+       qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port),
+                 QCA8K_PORT_LOOKUP_MEMBER, port_mask);
+
+       return 0;
+}
+
+static void
+qca8k_port_bridge_leave(struct dsa_switch *ds, int port)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+       int i;
+
+       for (i = 1; i < QCA8K_NUM_PORTS; i++) {
+               if (priv->port_sts[i].bridge_dev !=
+                   priv->port_sts[port].bridge_dev)
+                       continue;
+               /* Remove this port to the portvlan mask of the other ports
+                * in the bridge
+                */
+               qca8k_reg_clear(priv,
+                               QCA8K_PORT_LOOKUP_CTRL(i),
+                               BIT(port));
+       }
+       priv->port_sts[port].bridge_dev = NULL;
+       /* Set the cpu port to be the only one in the portvlan mask of
+        * this port
+        */
+       qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port),
+                 QCA8K_PORT_LOOKUP_MEMBER, BIT(QCA8K_CPU_PORT));
+}
+
+static int
+qca8k_port_enable(struct dsa_switch *ds, int port,
+                 struct phy_device *phy)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+
+       qca8k_port_set_status(priv, port, 1);
+       priv->port_sts[port].enabled = 1;
+
+       return 0;
+}
+
+static void
+qca8k_port_disable(struct dsa_switch *ds, int port,
+                  struct phy_device *phy)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+
+       qca8k_port_set_status(priv, port, 0);
+       priv->port_sts[port].enabled = 0;
+}
+
+static int
+qca8k_port_fdb_insert(struct qca8k_priv *priv, const u8 *addr,
+                     u16 port_mask, u16 vid)
+{
+       /* Set the vid to the port vlan id if no vid is set */
+       if (!vid)
+               vid = 1;
+
+       return qca8k_fdb_add(priv, addr, port_mask, vid,
+                            QCA8K_ATU_STATUS_STATIC);
+}
+
+static int
+qca8k_port_fdb_prepare(struct dsa_switch *ds, int port,
+                      const struct switchdev_obj_port_fdb *fdb,
+                      struct switchdev_trans *trans)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+
+       /* The FDB table for static and auto learned entries is the same. We
+        * need to reserve an entry with no port_mask set to make sure that
+        * when port_fdb_add is called an entry is still available. Otherwise
+        * the last free entry might have been used up by auto learning
+        */
+       return qca8k_port_fdb_insert(priv, fdb->addr, 0, fdb->vid);
+}
+
+static void
+qca8k_port_fdb_add(struct dsa_switch *ds, int port,
+                  const struct switchdev_obj_port_fdb *fdb,
+                  struct switchdev_trans *trans)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+       u16 port_mask = BIT(port);
+
+       /* Update the FDB entry adding the port_mask */
+       qca8k_port_fdb_insert(priv, fdb->addr, port_mask, fdb->vid);
+}
+
+static int
+qca8k_port_fdb_del(struct dsa_switch *ds, int port,
+                  const struct switchdev_obj_port_fdb *fdb)
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+       u16 port_mask = BIT(port);
+       u16 vid = fdb->vid;
+
+       if (!vid)
+               vid = 1;
+
+       return qca8k_fdb_del(priv, fdb->addr, port_mask, vid);
+}
+
+static int
+qca8k_port_fdb_dump(struct dsa_switch *ds, int port,
+                   struct switchdev_obj_port_fdb *fdb,
+                   int (*cb)(struct switchdev_obj *obj))
+{
+       struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+       struct qca8k_fdb _fdb = { 0 };
+       int cnt = QCA8K_NUM_FDB_RECORDS;
+       int ret = 0;
+
+       mutex_lock(&priv->reg_mutex);
+       while (cnt-- && !qca8k_fdb_next(priv, &_fdb, port)) {
+               if (!_fdb.aging)
+                       break;
+
+               ether_addr_copy(fdb->addr, _fdb.mac);
+               fdb->vid = _fdb.vid;
+               if (_fdb.aging == QCA8K_ATU_STATUS_STATIC)
+                       fdb->ndm_state = NUD_NOARP;
+               else
+                       fdb->ndm_state = NUD_REACHABLE;
+
+               ret = cb(&fdb->obj);
+               if (ret)
+                       break;
+       }
+       mutex_unlock(&priv->reg_mutex);
+
+       return 0;
+}
+
+static enum dsa_tag_protocol
+qca8k_get_tag_protocol(struct dsa_switch *ds)
+{
+       return DSA_TAG_PROTO_QCA;
+}
+
+static struct dsa_switch_ops qca8k_switch_ops = {
+       .get_tag_protocol       = qca8k_get_tag_protocol,
+       .setup                  = qca8k_setup,
+       .set_addr               = qca8k_set_addr,
+       .get_strings            = qca8k_get_strings,
+       .phy_read               = qca8k_phy_read,
+       .phy_write              = qca8k_phy_write,
+       .get_ethtool_stats      = qca8k_get_ethtool_stats,
+       .get_sset_count         = qca8k_get_sset_count,
+       .get_eee                = qca8k_get_eee,
+       .set_eee                = qca8k_set_eee,
+       .port_enable            = qca8k_port_enable,
+       .port_disable           = qca8k_port_disable,
+       .port_stp_state_set     = qca8k_port_stp_state_set,
+       .port_bridge_join       = qca8k_port_bridge_join,
+       .port_bridge_leave      = qca8k_port_bridge_leave,
+       .port_fdb_prepare       = qca8k_port_fdb_prepare,
+       .port_fdb_add           = qca8k_port_fdb_add,
+       .port_fdb_del           = qca8k_port_fdb_del,
+       .port_fdb_dump          = qca8k_port_fdb_dump,
+};
+
+static int
+qca8k_sw_probe(struct mdio_device *mdiodev)
+{
+       struct qca8k_priv *priv;
+       u32 id;
+
+       /* allocate the private data struct so that we can probe the switches
+        * ID register
+        */
+       priv = devm_kzalloc(&mdiodev->dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       priv->bus = mdiodev->bus;
+
+       /* read the switches ID register */
+       id = qca8k_read(priv, QCA8K_REG_MASK_CTRL);
+       id >>= QCA8K_MASK_CTRL_ID_S;
+       id &= QCA8K_MASK_CTRL_ID_M;
+       if (id != QCA8K_ID_QCA8337)
+               return -ENODEV;
+
+       priv->ds = devm_kzalloc(&mdiodev->dev, sizeof(*priv->ds), GFP_KERNEL);
+       if (!priv->ds)
+               return -ENOMEM;
+
+       priv->ds->priv = priv;
+       priv->ds->dev = &mdiodev->dev;
+       priv->ds->ops = &qca8k_switch_ops;
+       mutex_init(&priv->reg_mutex);
+       dev_set_drvdata(&mdiodev->dev, priv);
+
+       return dsa_register_switch(priv->ds, priv->ds->dev->of_node);
+}
+
+static void
+qca8k_sw_remove(struct mdio_device *mdiodev)
+{
+       struct qca8k_priv *priv = dev_get_drvdata(&mdiodev->dev);
+       int i;
+
+       for (i = 0; i < QCA8K_NUM_PORTS; i++)
+               qca8k_port_set_status(priv, i, 0);
+
+       dsa_unregister_switch(priv->ds);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static void
+qca8k_set_pm(struct qca8k_priv *priv, int enable)
+{
+       int i;
+
+       for (i = 0; i < QCA8K_NUM_PORTS; i++) {
+               if (!priv->port_sts[i].enabled)
+                       continue;
+
+               qca8k_port_set_status(priv, i, enable);
+       }
+}
+
+static int qca8k_suspend(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct qca8k_priv *priv = platform_get_drvdata(pdev);
+
+       qca8k_set_pm(priv, 0);
+
+       return dsa_switch_suspend(priv->ds);
+}
+
+static int qca8k_resume(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct qca8k_priv *priv = platform_get_drvdata(pdev);
+
+       qca8k_set_pm(priv, 1);
+
+       return dsa_switch_resume(priv->ds);
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(qca8k_pm_ops,
+                        qca8k_suspend, qca8k_resume);
+
+static const struct of_device_id qca8k_of_match[] = {
+       { .compatible = "qca,qca8337" },
+       { /* sentinel */ },
+};
+
+static struct mdio_driver qca8kmdio_driver = {
+       .probe  = qca8k_sw_probe,
+       .remove = qca8k_sw_remove,
+       .mdiodrv.driver = {
+               .name = "qca8k",
+               .of_match_table = qca8k_of_match,
+               .pm = &qca8k_pm_ops,
+       },
+};
+
+static int __init
+qca8kmdio_driver_register(void)
+{
+       return mdio_driver_register(&qca8kmdio_driver);
+}
+module_init(qca8kmdio_driver_register);
+
+static void __exit
+qca8kmdio_driver_unregister(void)
+{
+       mdio_driver_unregister(&qca8kmdio_driver);
+}
+module_exit(qca8kmdio_driver_unregister);
+
+MODULE_AUTHOR("Mathieu Olivari, John Crispin <john@phrozen.org>");
+MODULE_DESCRIPTION("Driver for QCA8K ethernet switch family");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:qca8k");
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
new file mode 100644 (file)
index 0000000..2014647
--- /dev/null
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2009 Felix Fietkau <nbd@nbd.name>
+ * Copyright (C) 2011-2012 Gabor Juhos <juhosg@openwrt.org>
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __QCA8K_H
+#define __QCA8K_H
+
+#include <linux/delay.h>
+#include <linux/regmap.h>
+
+#define QCA8K_NUM_PORTS                                        7
+
+#define PHY_ID_QCA8337                                 0x004dd036
+#define QCA8K_ID_QCA8337                               0x13
+
+#define QCA8K_NUM_FDB_RECORDS                          2048
+
+#define QCA8K_CPU_PORT                                 0
+
+/* Global control registers */
+#define QCA8K_REG_MASK_CTRL                            0x000
+#define   QCA8K_MASK_CTRL_ID_M                         0xff
+#define   QCA8K_MASK_CTRL_ID_S                         8
+#define QCA8K_REG_PORT0_PAD_CTRL                       0x004
+#define QCA8K_REG_PORT5_PAD_CTRL                       0x008
+#define QCA8K_REG_PORT6_PAD_CTRL                       0x00c
+#define   QCA8K_PORT_PAD_RGMII_EN                      BIT(26)
+#define   QCA8K_PORT_PAD_RGMII_TX_DELAY(x)             \
+                                               ((0x8 + (x & 0x3)) << 22)
+#define   QCA8K_PORT_PAD_RGMII_RX_DELAY(x)             \
+                                               ((0x10 + (x & 0x3)) << 20)
+#define   QCA8K_PORT_PAD_RGMII_RX_DELAY_EN             BIT(24)
+#define   QCA8K_PORT_PAD_SGMII_EN                      BIT(7)
+#define QCA8K_REG_MODULE_EN                            0x030
+#define   QCA8K_MODULE_EN_MIB                          BIT(0)
+#define QCA8K_REG_MIB                                  0x034
+#define   QCA8K_MIB_FLUSH                              BIT(24)
+#define   QCA8K_MIB_CPU_KEEP                           BIT(20)
+#define   QCA8K_MIB_BUSY                               BIT(17)
+#define QCA8K_GOL_MAC_ADDR0                            0x60
+#define QCA8K_GOL_MAC_ADDR1                            0x64
+#define QCA8K_REG_PORT_STATUS(_i)                      (0x07c + (_i) * 4)
+#define   QCA8K_PORT_STATUS_SPEED                      GENMASK(2, 0)
+#define   QCA8K_PORT_STATUS_SPEED_S                    0
+#define   QCA8K_PORT_STATUS_TXMAC                      BIT(2)
+#define   QCA8K_PORT_STATUS_RXMAC                      BIT(3)
+#define   QCA8K_PORT_STATUS_TXFLOW                     BIT(4)
+#define   QCA8K_PORT_STATUS_RXFLOW                     BIT(5)
+#define   QCA8K_PORT_STATUS_DUPLEX                     BIT(6)
+#define   QCA8K_PORT_STATUS_LINK_UP                    BIT(8)
+#define   QCA8K_PORT_STATUS_LINK_AUTO                  BIT(9)
+#define   QCA8K_PORT_STATUS_LINK_PAUSE                 BIT(10)
+#define QCA8K_REG_PORT_HDR_CTRL(_i)                    (0x9c + (_i * 4))
+#define   QCA8K_PORT_HDR_CTRL_RX_MASK                  GENMASK(3, 2)
+#define   QCA8K_PORT_HDR_CTRL_RX_S                     2
+#define   QCA8K_PORT_HDR_CTRL_TX_MASK                  GENMASK(1, 0)
+#define   QCA8K_PORT_HDR_CTRL_TX_S                     0
+#define   QCA8K_PORT_HDR_CTRL_ALL                      2
+#define   QCA8K_PORT_HDR_CTRL_MGMT                     1
+#define   QCA8K_PORT_HDR_CTRL_NONE                     0
+
+/* EEE control registers */
+#define QCA8K_REG_EEE_CTRL                             0x100
+#define  QCA8K_REG_EEE_CTRL_LPI_EN(_i)                 ((_i + 1) * 2)
+
+/* ACL registers */
+#define QCA8K_REG_PORT_VLAN_CTRL0(_i)                  (0x420 + (_i * 8))
+#define   QCA8K_PORT_VLAN_CVID(x)                      (x << 16)
+#define   QCA8K_PORT_VLAN_SVID(x)                      x
+#define QCA8K_REG_PORT_VLAN_CTRL1(_i)                  (0x424 + (_i * 8))
+#define QCA8K_REG_IPV4_PRI_BASE_ADDR                   0x470
+#define QCA8K_REG_IPV4_PRI_ADDR_MASK                   0x474
+
+/* Lookup registers */
+#define QCA8K_REG_ATU_DATA0                            0x600
+#define   QCA8K_ATU_ADDR2_S                            24
+#define   QCA8K_ATU_ADDR3_S                            16
+#define   QCA8K_ATU_ADDR4_S                            8
+#define QCA8K_REG_ATU_DATA1                            0x604
+#define   QCA8K_ATU_PORT_M                             0x7f
+#define   QCA8K_ATU_PORT_S                             16
+#define   QCA8K_ATU_ADDR0_S                            8
+#define QCA8K_REG_ATU_DATA2                            0x608
+#define   QCA8K_ATU_VID_M                              0xfff
+#define   QCA8K_ATU_VID_S                              8
+#define   QCA8K_ATU_STATUS_M                           0xf
+#define   QCA8K_ATU_STATUS_STATIC                      0xf
+#define QCA8K_REG_ATU_FUNC                             0x60c
+#define   QCA8K_ATU_FUNC_BUSY                          BIT(31)
+#define   QCA8K_ATU_FUNC_PORT_EN                       BIT(14)
+#define   QCA8K_ATU_FUNC_MULTI_EN                      BIT(13)
+#define   QCA8K_ATU_FUNC_FULL                          BIT(12)
+#define   QCA8K_ATU_FUNC_PORT_M                                0xf
+#define   QCA8K_ATU_FUNC_PORT_S                                8
+#define QCA8K_REG_GLOBAL_FW_CTRL0                      0x620
+#define   QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN            BIT(10)
+#define QCA8K_REG_GLOBAL_FW_CTRL1                      0x624
+#define   QCA8K_GLOBAL_FW_CTRL1_IGMP_DP_S              24
+#define   QCA8K_GLOBAL_FW_CTRL1_BC_DP_S                        16
+#define   QCA8K_GLOBAL_FW_CTRL1_MC_DP_S                        8
+#define   QCA8K_GLOBAL_FW_CTRL1_UC_DP_S                        0
+#define QCA8K_PORT_LOOKUP_CTRL(_i)                     (0x660 + (_i) * 0xc)
+#define   QCA8K_PORT_LOOKUP_MEMBER                     GENMASK(6, 0)
+#define   QCA8K_PORT_LOOKUP_STATE_MASK                 GENMASK(18, 16)
+#define   QCA8K_PORT_LOOKUP_STATE_DISABLED             (0 << 16)
+#define   QCA8K_PORT_LOOKUP_STATE_BLOCKING             (1 << 16)
+#define   QCA8K_PORT_LOOKUP_STATE_LISTENING            (2 << 16)
+#define   QCA8K_PORT_LOOKUP_STATE_LEARNING             (3 << 16)
+#define   QCA8K_PORT_LOOKUP_STATE_FORWARD              (4 << 16)
+#define   QCA8K_PORT_LOOKUP_STATE                      GENMASK(18, 16)
+#define   QCA8K_PORT_LOOKUP_LEARN                      BIT(20)
+
+/* Pkt edit registers */
+#define QCA8K_EGRESS_VLAN(x)                           (0x0c70 + (4 * (x / 2)))
+
+/* L3 registers */
+#define QCA8K_HROUTER_CONTROL                          0xe00
+#define   QCA8K_HROUTER_CONTROL_GLB_LOCKTIME_M         GENMASK(17, 16)
+#define   QCA8K_HROUTER_CONTROL_GLB_LOCKTIME_S         16
+#define   QCA8K_HROUTER_CONTROL_ARP_AGE_MODE           1
+#define QCA8K_HROUTER_PBASED_CONTROL1                  0xe08
+#define QCA8K_HROUTER_PBASED_CONTROL2                  0xe0c
+#define QCA8K_HNAT_CONTROL                             0xe38
+
+/* MIB registers */
+#define QCA8K_PORT_MIB_COUNTER(_i)                     (0x1000 + (_i) * 0x100)
+
+/* QCA specific MII registers */
+#define MII_ATH_MMD_ADDR                               0x0d
+#define MII_ATH_MMD_DATA                               0x0e
+
+enum {
+       QCA8K_PORT_SPEED_10M = 0,
+       QCA8K_PORT_SPEED_100M = 1,
+       QCA8K_PORT_SPEED_1000M = 2,
+       QCA8K_PORT_SPEED_ERR = 3,
+};
+
+enum qca8k_fdb_cmd {
+       QCA8K_FDB_FLUSH = 1,
+       QCA8K_FDB_LOAD = 2,
+       QCA8K_FDB_PURGE = 3,
+       QCA8K_FDB_NEXT = 6,
+       QCA8K_FDB_SEARCH = 7,
+};
+
+struct ar8xxx_port_status {
+       struct ethtool_eee eee;
+       struct net_device *bridge_dev;
+       int enabled;
+};
+
+struct qca8k_priv {
+       struct regmap *regmap;
+       struct mii_bus *bus;
+       struct ar8xxx_port_status port_sts[QCA8K_NUM_PORTS];
+       struct dsa_switch *ds;
+       struct mutex reg_mutex;
+};
+
+struct qca8k_mib_desc {
+       unsigned int size;
+       unsigned int offset;
+       const char *name;
+};
+
+struct qca8k_fdb {
+       u16 vid;
+       u8 port_mask;
+       u8 aging;
+       u8 mac[6];
+};
+
+#endif /* __QCA8K_H */
index 25c55ab..9133e79 100644 (file)
@@ -3089,7 +3089,7 @@ static void set_rx_mode(struct net_device *dev)
        iowrite16(new_mode, ioaddr + EL3_CMD);
 }
 
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
 /* Setup the card so that it can receive frames with an 802.1q VLAN tag.
    Note that this must be done after each RxReset due to some backwards
    compatibility logic in the Cyclone and Tornado ASICs */
index 1d10696..8af2c88 100644 (file)
@@ -66,7 +66,7 @@
  */
 #define ZEROCOPY
 
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
 #define VLAN_SUPPORT
 #endif
 
index c83ebae..9066838 100644 (file)
@@ -2961,7 +2961,7 @@ static void et131x_get_drvinfo(struct net_device *netdev,
                sizeof(info->bus_info));
 }
 
-static struct ethtool_ops et131x_ethtool_ops = {
+static const struct ethtool_ops et131x_ethtool_ops = {
        .get_drvinfo    = et131x_get_drvinfo,
        .get_regs_len   = et131x_get_regs_len,
        .get_regs       = et131x_get_regs,
index 5c536b8..bfeaec5 100644 (file)
@@ -2681,8 +2681,8 @@ static int ena_calc_io_queue_num(struct pci_dev *pdev,
        return io_queue_num;
 }
 
-static int ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
-                            struct ena_com_dev_get_features_ctx *get_feat_ctx)
+static void ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
+                             struct ena_com_dev_get_features_ctx *get_feat_ctx)
 {
        bool has_mem_bar;
 
@@ -2693,8 +2693,6 @@ static int ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
                ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV;
        else
                ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
-
-       return 0;
 }
 
 static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
@@ -2913,11 +2911,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto err_free_region;
        }
 
-       rc = ena_set_push_mode(pdev, ena_dev, &get_feat_ctx);
-       if (rc) {
-               dev_err(&pdev->dev, "Invalid module param(push_mode)\n");
-               goto err_device_destroy;
-       }
+       ena_set_push_mode(pdev, ena_dev, &get_feat_ctx);
 
        if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
                ena_dev->mem_bar = ioremap_wc(pci_resource_start(pdev, ENA_MEM_BAR),
index dcf2a1f..dc57f27 100644 (file)
 #define WRITERDP(lp, x)        out_be16(lp->base + LANCE_RDP, (x))
 #define READRDP(lp)    in_be16(lp->base + LANCE_RDP)
 
-#if defined(CONFIG_HPLANCE) || defined(CONFIG_HPLANCE_MODULE)
+#if IS_ENABLED(CONFIG_HPLANCE)
 #include "hplance.h"
 
 #undef WRITERAP
 #undef WRITERDP
 #undef READRDP
 
-#if defined(CONFIG_MVME147_NET) || defined(CONFIG_MVME147_NET_MODULE)
+#if IS_ENABLED(CONFIG_MVME147_NET)
 
 /* Lossage Factor Nine, Mr Sulu. */
 #define WRITERAP(lp, x)        (lp->writerap(lp, x))
@@ -86,7 +86,7 @@ static inline __u16 READRDP(struct lance_private *lp)
 }
 
 #endif
-#endif /* CONFIG_HPLANCE || CONFIG_HPLANCE_MODULE */
+#endif /* IS_ENABLED(CONFIG_HPLANCE) */
 
 /* debugging output macros, various flavours */
 /* #define TEST_HITS */
index 9496005..f92cc97 100644 (file)
@@ -89,7 +89,7 @@ Revision History:
 #include <asm/byteorder.h>
 #include <asm/uaccess.h>
 
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
 #define AMD8111E_VLAN_TAG_USED 1
 #else
 #define AMD8111E_VLAN_TAG_USED 0
index a9b2709..7f9216d 100644 (file)
@@ -1708,9 +1708,9 @@ static const struct net_device_ops xgbe_netdev_ops = {
        .ndo_set_features       = xgbe_set_features,
 };
 
-struct net_device_ops *xgbe_get_netdev_ops(void)
+const struct net_device_ops *xgbe_get_netdev_ops(void)
 {
-       return (struct net_device_ops *)&xgbe_netdev_ops;
+       return &xgbe_netdev_ops;
 }
 
 static void xgbe_rx_refresh(struct xgbe_channel *channel)
index 11d9f0c..4007b42 100644 (file)
@@ -623,7 +623,7 @@ static const struct ethtool_ops xgbe_ethtool_ops = {
        .get_ts_info = xgbe_get_ts_info,
 };
 
-struct ethtool_ops *xgbe_get_ethtool_ops(void)
+const struct ethtool_ops *xgbe_get_ethtool_ops(void)
 {
-       return (struct ethtool_ops *)&xgbe_ethtool_ops;
+       return &xgbe_ethtool_ops;
 }
index 3eee320..9de0788 100644 (file)
@@ -861,9 +861,15 @@ static int xgbe_resume(struct device *dev)
        pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER;
        XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
 
-       if (netif_running(netdev))
+       if (netif_running(netdev)) {
                ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT);
 
+               /* Schedule a restart in case the link or phy state changed
+                * while we were powered down.
+                */
+               schedule_work(&pdata->restart_work);
+       }
+
        DBGPR("<--xgbe_resume\n");
 
        return ret;
index 98d9d63..5dd17dc 100644 (file)
@@ -956,8 +956,9 @@ struct xgbe_prv_data {
 void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *);
 void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *);
 void xgbe_init_function_ptrs_desc(struct xgbe_desc_if *);
-struct net_device_ops *xgbe_get_netdev_ops(void);
-struct ethtool_ops *xgbe_get_ethtool_ops(void);
+const struct net_device_ops *xgbe_get_netdev_ops(void);
+const struct ethtool_ops *xgbe_get_ethtool_ops(void);
+
 #ifdef CONFIG_AMD_XGBE_DCB
 const struct dcbnl_rtnl_ops *xgbe_get_dcbnl_ops(void);
 #endif
index 22a7b26..d372d42 100644 (file)
@@ -54,55 +54,68 @@ static void xgene_get_drvinfo(struct net_device *ndev,
        sprintf(info->bus_info, "%s", pdev->name);
 }
 
-static int xgene_get_settings(struct net_device *ndev, struct ethtool_cmd *cmd)
+static int xgene_get_link_ksettings(struct net_device *ndev,
+                                   struct ethtool_link_ksettings *cmd)
 {
        struct xgene_enet_pdata *pdata = netdev_priv(ndev);
-       struct phy_device *phydev = pdata->phy_dev;
+       struct phy_device *phydev = ndev->phydev;
+       u32 supported;
 
        if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) {
                if (phydev == NULL)
                        return -ENODEV;
 
-               return phy_ethtool_gset(phydev, cmd);
+               return phy_ethtool_ksettings_get(phydev, cmd);
        } else if (pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) {
                if (pdata->mdio_driver) {
                        if (!phydev)
                                return -ENODEV;
 
-                       return phy_ethtool_gset(phydev, cmd);
+                       return phy_ethtool_ksettings_get(phydev, cmd);
                }
 
-               cmd->supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg |
-                                SUPPORTED_MII;
-               cmd->advertising = cmd->supported;
-               ethtool_cmd_speed_set(cmd, SPEED_1000);
-               cmd->duplex = DUPLEX_FULL;
-               cmd->port = PORT_MII;
-               cmd->transceiver = XCVR_INTERNAL;
-               cmd->autoneg = AUTONEG_ENABLE;
+               supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg |
+                       SUPPORTED_MII;
+               ethtool_convert_legacy_u32_to_link_mode(
+                       cmd->link_modes.supported,
+                       supported);
+               ethtool_convert_legacy_u32_to_link_mode(
+                       cmd->link_modes.advertising,
+                       supported);
+
+               cmd->base.speed = SPEED_1000;
+               cmd->base.duplex = DUPLEX_FULL;
+               cmd->base.port = PORT_MII;
+               cmd->base.autoneg = AUTONEG_ENABLE;
        } else {
-               cmd->supported = SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE;
-               cmd->advertising = cmd->supported;
-               ethtool_cmd_speed_set(cmd, SPEED_10000);
-               cmd->duplex = DUPLEX_FULL;
-               cmd->port = PORT_FIBRE;
-               cmd->transceiver = XCVR_INTERNAL;
-               cmd->autoneg = AUTONEG_DISABLE;
+               supported = SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE;
+               ethtool_convert_legacy_u32_to_link_mode(
+                       cmd->link_modes.supported,
+                       supported);
+               ethtool_convert_legacy_u32_to_link_mode(
+                       cmd->link_modes.advertising,
+                       supported);
+
+               cmd->base.speed = SPEED_10000;
+               cmd->base.duplex = DUPLEX_FULL;
+               cmd->base.port = PORT_FIBRE;
+               cmd->base.autoneg = AUTONEG_DISABLE;
        }
 
        return 0;
 }
 
-static int xgene_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd)
+static int xgene_set_link_ksettings(struct net_device *ndev,
+                                   const struct ethtool_link_ksettings *cmd)
 {
        struct xgene_enet_pdata *pdata = netdev_priv(ndev);
-       struct phy_device *phydev = pdata->phy_dev;
+       struct phy_device *phydev = ndev->phydev;
 
        if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) {
                if (!phydev)
                        return -ENODEV;
 
-               return phy_ethtool_sset(phydev, cmd);
+               return phy_ethtool_ksettings_set(phydev, cmd);
        }
 
        if (pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) {
@@ -110,7 +123,7 @@ static int xgene_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd)
                        if (!phydev)
                                return -ENODEV;
 
-                       return phy_ethtool_sset(phydev, cmd);
+                       return phy_ethtool_ksettings_set(phydev, cmd);
                }
        }
 
@@ -152,12 +165,12 @@ static void xgene_get_ethtool_stats(struct net_device *ndev,
 
 static const struct ethtool_ops xgene_ethtool_ops = {
        .get_drvinfo = xgene_get_drvinfo,
-       .get_settings = xgene_get_settings,
-       .set_settings = xgene_set_settings,
        .get_link = ethtool_op_get_link,
        .get_strings = xgene_get_strings,
        .get_sset_count = xgene_get_sset_count,
-       .get_ethtool_stats = xgene_get_ethtool_stats
+       .get_ethtool_stats = xgene_get_ethtool_stats,
+       .get_link_ksettings = xgene_get_link_ksettings,
+       .set_link_ksettings = xgene_set_link_ksettings,
 };
 
 void xgene_enet_set_ethtool_ops(struct net_device *ndev)
index 321fb19..c481f10 100644 (file)
@@ -713,7 +713,7 @@ static void xgene_enet_adjust_link(struct net_device *ndev)
 {
        struct xgene_enet_pdata *pdata = netdev_priv(ndev);
        const struct xgene_mac_ops *mac_ops = pdata->mac_ops;
-       struct phy_device *phydev = pdata->phy_dev;
+       struct phy_device *phydev = ndev->phydev;
 
        if (phydev->link) {
                if (pdata->phy_speed != phydev->speed) {
@@ -761,10 +761,6 @@ int xgene_enet_phy_connect(struct net_device *ndev)
        if (dev->of_node) {
                for (i = 0 ; i < 2; i++) {
                        np = of_parse_phandle(dev->of_node, "phy-handle", i);
-
-                       if (!np)
-                               continue;
-
                        phy_dev = of_phy_connect(ndev, np,
                                                 &xgene_enet_adjust_link,
                                                 0, pdata->phy_mode);
@@ -777,15 +773,13 @@ int xgene_enet_phy_connect(struct net_device *ndev)
                        netdev_err(ndev, "Could not connect to PHY\n");
                        return -ENODEV;
                }
-
-               pdata->phy_dev = phy_dev;
        } else {
 #ifdef CONFIG_ACPI
                struct acpi_device *adev = acpi_phy_find_device(dev);
                if (adev)
-                       pdata->phy_dev =  adev->driver_data;
-
-               phy_dev = pdata->phy_dev;
+                       phy_dev = adev->driver_data;
+               else
+                       phy_dev = NULL;
 
                if (!phy_dev ||
                    phy_connect_direct(ndev, phy_dev, &xgene_enet_adjust_link,
@@ -853,8 +847,6 @@ static int xgene_mdiobus_register(struct xgene_enet_pdata *pdata,
        if (!phy)
                return -EIO;
 
-       pdata->phy_dev = phy;
-
        return ret;
 }
 
@@ -894,14 +886,18 @@ int xgene_enet_mdio_config(struct xgene_enet_pdata *pdata)
 
 void xgene_enet_phy_disconnect(struct xgene_enet_pdata *pdata)
 {
-       if (pdata->phy_dev)
-               phy_disconnect(pdata->phy_dev);
+       struct net_device *ndev = pdata->ndev;
+
+       if (ndev->phydev)
+               phy_disconnect(ndev->phydev);
 }
 
 void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata)
 {
-       if (pdata->phy_dev)
-               phy_disconnect(pdata->phy_dev);
+       struct net_device *ndev = pdata->ndev;
+
+       if (ndev->phydev)
+               phy_disconnect(ndev->phydev);
 
        mdiobus_unregister(pdata->mdio_bus);
        mdiobus_free(pdata->mdio_bus);
index b8b9495..522ba92 100644 (file)
@@ -748,8 +748,8 @@ static int xgene_enet_open(struct net_device *ndev)
        if (ret)
                return ret;
 
-       if (pdata->phy_dev) {
-               phy_start(pdata->phy_dev);
+       if (ndev->phydev) {
+               phy_start(ndev->phydev);
        } else {
                schedule_delayed_work(&pdata->link_work, PHY_POLL_LINK_OFF);
                netif_carrier_off(ndev);
@@ -772,8 +772,8 @@ static int xgene_enet_close(struct net_device *ndev)
        mac_ops->tx_disable(pdata);
        mac_ops->rx_disable(pdata);
 
-       if (pdata->phy_dev)
-               phy_stop(pdata->phy_dev);
+       if (ndev->phydev)
+               phy_stop(ndev->phydev);
        else
                cancel_delayed_work_sync(&pdata->link_work);
 
index b339fc1..7735371 100644 (file)
@@ -174,7 +174,6 @@ struct xgene_cle_ops {
 struct xgene_enet_pdata {
        struct net_device *ndev;
        struct mii_bus *mdio_bus;
-       struct phy_device *phy_dev;
        int phy_speed;
        struct clk *clk;
        struct platform_device *pdev;
index d672e71..279ee27 100644 (file)
@@ -155,19 +155,23 @@ static void xgene_enet_rd_mac(struct xgene_enet_pdata *pdata,
                           rd_addr);
 }
 
-static void xgene_enet_rd_pcs(struct xgene_enet_pdata *pdata,
+static bool xgene_enet_rd_pcs(struct xgene_enet_pdata *pdata,
                              u32 rd_addr, u32 *rd_data)
 {
        void __iomem *addr, *rd, *cmd, *cmd_done;
+       bool success;
 
        addr = pdata->pcs_addr + PCS_ADDR_REG_OFFSET;
        rd = pdata->pcs_addr + PCS_READ_REG_OFFSET;
        cmd = pdata->pcs_addr + PCS_COMMAND_REG_OFFSET;
        cmd_done = pdata->pcs_addr + PCS_COMMAND_DONE_REG_OFFSET;
 
-       if (!xgene_enet_rd_indirect(addr, rd, cmd, cmd_done, rd_addr, rd_data))
+       success = xgene_enet_rd_indirect(addr, rd, cmd, cmd_done, rd_addr, rd_data);
+       if (!success)
                netdev_err(pdata->ndev, "PCS read failed, addr: %04x\n",
                           rd_addr);
+
+       return success;
 }
 
 static int xgene_enet_ecc_init(struct xgene_enet_pdata *pdata)
@@ -208,7 +212,9 @@ static void xgene_pcs_reset(struct xgene_enet_pdata *pdata)
 {
        u32 data;
 
-       xgene_enet_rd_pcs(pdata, PCS_CONTROL_1, &data);
+       if (!xgene_enet_rd_pcs(pdata, PCS_CONTROL_1, &data))
+               return;
+
        xgene_enet_wr_pcs(pdata, PCS_CONTROL_1, data | PCS_CTRL_PCS_RST);
        xgene_enet_wr_pcs(pdata, PCS_CONTROL_1, data & ~PCS_CTRL_PCS_RST);
 }
index 058460b..a22403c 100644 (file)
@@ -104,7 +104,7 @@ static int arc_mdio_write(struct mii_bus *bus, int phy_addr,
  * @bus: points to the mii_bus structure
  * Description: reset the MII bus
  */
-int arc_mdio_reset(struct mii_bus *bus)
+static int arc_mdio_reset(struct mii_bus *bus)
 {
        struct arc_emac_priv *priv = bus->priv;
        struct arc_emac_mdio_bus_data *data = &priv->bus_data;
index 8fc93c5..6cac919 100644 (file)
@@ -76,11 +76,19 @@ enum alx_device_quirks {
        ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG = BIT(0),
 };
 
+#define ALX_FLAG_USING_MSIX    BIT(0)
+#define ALX_FLAG_USING_MSI     BIT(1)
+
 struct alx_priv {
        struct net_device *dev;
 
        struct alx_hw hw;
 
+       /* msi-x vectors */
+       int num_vec;
+       struct msix_entry *msix_entries;
+       char irq_lbl[IFNAMSIZ + 8];
+
        /* all descriptor memory */
        struct {
                dma_addr_t dma;
@@ -105,7 +113,7 @@ struct alx_priv {
 
        u16 msg_enable;
 
-       bool msi;
+       int flags;
 
        /* protects hw.stats */
        spinlock_t stats_lock;
index 1fe35e4..6ac40b0 100644 (file)
@@ -1031,6 +1031,20 @@ void alx_configure_basic(struct alx_hw *hw)
        alx_write_mem32(hw, ALX_WRR, val);
 }
 
+void alx_mask_msix(struct alx_hw *hw, int index, bool mask)
+{
+       u32 reg, val;
+
+       reg = ALX_MSIX_ENTRY_BASE + index * PCI_MSIX_ENTRY_SIZE +
+               PCI_MSIX_ENTRY_VECTOR_CTRL;
+
+       val = mask ? PCI_MSIX_ENTRY_CTRL_MASKBIT : 0;
+
+       alx_write_mem32(hw, reg, val);
+       alx_post_write(hw);
+}
+
+
 bool alx_get_phy_info(struct alx_hw *hw)
 {
        u16  devs1, devs2;
index f289c05..0191477 100644 (file)
@@ -562,6 +562,7 @@ int alx_reset_mac(struct alx_hw *hw);
 void alx_set_macaddr(struct alx_hw *hw, const u8 *addr);
 bool alx_phy_configured(struct alx_hw *hw);
 void alx_configure_basic(struct alx_hw *hw);
+void alx_mask_msix(struct alx_hw *hw, int index, bool mask);
 void alx_disable_rss(struct alx_hw *hw);
 bool alx_get_phy_info(struct alx_hw *hw);
 void alx_update_hw_stats(struct alx_hw *hw);
index 6453148..c0f84b7 100644 (file)
@@ -51,6 +51,9 @@
 
 const char alx_drv_name[] = "alx";
 
+static bool msix = false;
+module_param(msix, bool, 0);
+MODULE_PARM_DESC(msix, "Enable msi-x interrupt support");
 
 static void alx_free_txbuf(struct alx_priv *alx, int entry)
 {
@@ -292,32 +295,29 @@ static int alx_poll(struct napi_struct *napi, int budget)
        napi_complete(&alx->napi);
 
        /* enable interrupt */
-       spin_lock_irqsave(&alx->irq_lock, flags);
-       alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
-       alx_write_mem32(hw, ALX_IMR, alx->int_mask);
-       spin_unlock_irqrestore(&alx->irq_lock, flags);
+       if (alx->flags & ALX_FLAG_USING_MSIX) {
+               alx_mask_msix(hw, 1, false);
+       } else {
+               spin_lock_irqsave(&alx->irq_lock, flags);
+               alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
+               alx_write_mem32(hw, ALX_IMR, alx->int_mask);
+               spin_unlock_irqrestore(&alx->irq_lock, flags);
+       }
 
        alx_post_write(hw);
 
        return work;
 }
 
-static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
+static bool alx_intr_handle_misc(struct alx_priv *alx, u32 intr)
 {
        struct alx_hw *hw = &alx->hw;
-       bool write_int_mask = false;
-
-       spin_lock(&alx->irq_lock);
-
-       /* ACK interrupt */
-       alx_write_mem32(hw, ALX_ISR, intr | ALX_ISR_DIS);
-       intr &= alx->int_mask;
 
        if (intr & ALX_ISR_FATAL) {
                netif_warn(alx, hw, alx->dev,
                           "fatal interrupt 0x%x, resetting\n", intr);
                alx_schedule_reset(alx);
-               goto out;
+               return true;
        }
 
        if (intr & ALX_ISR_ALERT)
@@ -329,19 +329,32 @@ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
                 * is cleared, the interrupt status could be cleared.
                 */
                alx->int_mask &= ~ALX_ISR_PHY;
-               write_int_mask = true;
+               alx_write_mem32(hw, ALX_IMR, alx->int_mask);
                alx_schedule_link_check(alx);
        }
 
+       return false;
+}
+
+static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
+{
+       struct alx_hw *hw = &alx->hw;
+
+       spin_lock(&alx->irq_lock);
+
+       /* ACK interrupt */
+       alx_write_mem32(hw, ALX_ISR, intr | ALX_ISR_DIS);
+       intr &= alx->int_mask;
+
+       if (alx_intr_handle_misc(alx, intr))
+               goto out;
+
        if (intr & (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0)) {
                napi_schedule(&alx->napi);
                /* mask rx/tx interrupt, enable them when napi complete */
                alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
-               write_int_mask = true;
-       }
-
-       if (write_int_mask)
                alx_write_mem32(hw, ALX_IMR, alx->int_mask);
+       }
 
        alx_write_mem32(hw, ALX_ISR, 0);
 
@@ -350,6 +363,46 @@ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
        return IRQ_HANDLED;
 }
 
+static irqreturn_t alx_intr_msix_ring(int irq, void *data)
+{
+       struct alx_priv *alx = data;
+       struct alx_hw *hw = &alx->hw;
+
+       /* mask interrupt to ACK chip */
+       alx_mask_msix(hw, 1, true);
+       /* clear interrupt status */
+       alx_write_mem32(hw, ALX_ISR, (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0));
+
+       napi_schedule(&alx->napi);
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t alx_intr_msix_misc(int irq, void *data)
+{
+       struct alx_priv *alx = data;
+       struct alx_hw *hw = &alx->hw;
+       u32 intr;
+
+       /* mask interrupt to ACK chip */
+       alx_mask_msix(hw, 0, true);
+
+       /* read interrupt status */
+       intr = alx_read_mem32(hw, ALX_ISR);
+       intr &= (alx->int_mask & ~ALX_ISR_ALL_QUEUES);
+
+       if (alx_intr_handle_misc(alx, intr))
+               return IRQ_HANDLED;
+
+       /* clear interrupt status */
+       alx_write_mem32(hw, ALX_ISR, intr);
+
+       /* enable interrupt again */
+       alx_mask_msix(hw, 0, false);
+
+       return IRQ_HANDLED;
+}
+
 static irqreturn_t alx_intr_msi(int irq, void *data)
 {
        struct alx_priv *alx = data;
@@ -614,31 +667,136 @@ static void alx_free_rings(struct alx_priv *alx)
 static void alx_config_vector_mapping(struct alx_priv *alx)
 {
        struct alx_hw *hw = &alx->hw;
+       u32 tbl = 0;
+
+       if (alx->flags & ALX_FLAG_USING_MSIX) {
+               tbl |= 1 << ALX_MSI_MAP_TBL1_TXQ0_SHIFT;
+               tbl |= 1 << ALX_MSI_MAP_TBL1_RXQ0_SHIFT;
+       }
 
-       alx_write_mem32(hw, ALX_MSI_MAP_TBL1, 0);
+       alx_write_mem32(hw, ALX_MSI_MAP_TBL1, tbl);
        alx_write_mem32(hw, ALX_MSI_MAP_TBL2, 0);
        alx_write_mem32(hw, ALX_MSI_ID_MAP, 0);
 }
 
+static bool alx_enable_msix(struct alx_priv *alx)
+{
+       int i, err, num_vec = 2;
+
+       alx->msix_entries = kcalloc(num_vec, sizeof(struct msix_entry),
+                                   GFP_KERNEL);
+       if (!alx->msix_entries) {
+               netdev_warn(alx->dev, "Allocation of msix entries failed!\n");
+               return false;
+       }
+
+       for (i = 0; i < num_vec; i++)
+               alx->msix_entries[i].entry = i;
+
+       err = pci_enable_msix(alx->hw.pdev, alx->msix_entries, num_vec);
+       if (err) {
+               kfree(alx->msix_entries);
+               netdev_warn(alx->dev, "Enabling MSI-X interrupts failed!\n");
+               return false;
+       }
+
+       alx->num_vec = num_vec;
+       return true;
+}
+
+static int alx_request_msix(struct alx_priv *alx)
+{
+       struct net_device *netdev = alx->dev;
+       int i, err, vector = 0, free_vector = 0;
+
+       err = request_irq(alx->msix_entries[0].vector, alx_intr_msix_misc,
+                         0, netdev->name, alx);
+       if (err)
+               goto out_err;
+
+       vector++;
+       sprintf(alx->irq_lbl, "%s-TxRx-0", netdev->name);
+
+       err = request_irq(alx->msix_entries[vector].vector,
+                         alx_intr_msix_ring, 0, alx->irq_lbl, alx);
+               if (err)
+                       goto out_free;
+
+       return 0;
+
+out_free:
+       free_irq(alx->msix_entries[free_vector++].vector, alx);
+
+       vector--;
+       for (i = 0; i < vector; i++)
+               free_irq(alx->msix_entries[free_vector++].vector, alx);
+
+out_err:
+       return err;
+}
+
+static void alx_init_intr(struct alx_priv *alx, bool msix)
+{
+       if (msix) {
+               if (alx_enable_msix(alx))
+                       alx->flags |= ALX_FLAG_USING_MSIX;
+       }
+
+       if (!(alx->flags & ALX_FLAG_USING_MSIX)) {
+               alx->num_vec = 1;
+
+               if (!pci_enable_msi(alx->hw.pdev))
+                       alx->flags |= ALX_FLAG_USING_MSI;
+       }
+}
+
+static void alx_disable_advanced_intr(struct alx_priv *alx)
+{
+       if (alx->flags & ALX_FLAG_USING_MSIX) {
+               kfree(alx->msix_entries);
+               pci_disable_msix(alx->hw.pdev);
+               alx->flags &= ~ALX_FLAG_USING_MSIX;
+       }
+
+       if (alx->flags & ALX_FLAG_USING_MSI) {
+               pci_disable_msi(alx->hw.pdev);
+               alx->flags &= ~ALX_FLAG_USING_MSI;
+       }
+}
+
 static void alx_irq_enable(struct alx_priv *alx)
 {
        struct alx_hw *hw = &alx->hw;
+       int i;
 
        /* level-1 interrupt switch */
        alx_write_mem32(hw, ALX_ISR, 0);
        alx_write_mem32(hw, ALX_IMR, alx->int_mask);
        alx_post_write(hw);
+
+       if (alx->flags & ALX_FLAG_USING_MSIX)
+               /* enable all msix irqs */
+               for (i = 0; i < alx->num_vec; i++)
+                       alx_mask_msix(hw, i, false);
 }
 
 static void alx_irq_disable(struct alx_priv *alx)
 {
        struct alx_hw *hw = &alx->hw;
+       int i;
 
        alx_write_mem32(hw, ALX_ISR, ALX_ISR_DIS);
        alx_write_mem32(hw, ALX_IMR, 0);
        alx_post_write(hw);
 
-       synchronize_irq(alx->hw.pdev->irq);
+       if (alx->flags & ALX_FLAG_USING_MSIX) {
+               for (i = 0; i < alx->num_vec; i++) {
+                       alx_mask_msix(hw, i, true);
+                       synchronize_irq(alx->msix_entries[i].vector);
+               }
+       } else {
+               synchronize_irq(alx->hw.pdev->irq);
+       }
 }
 
 static int alx_request_irq(struct alx_priv *alx)
@@ -650,9 +808,18 @@ static int alx_request_irq(struct alx_priv *alx)
 
        msi_ctrl = (hw->imt >> 1) << ALX_MSI_RETRANS_TM_SHIFT;
 
-       if (!pci_enable_msi(alx->hw.pdev)) {
-               alx->msi = true;
+       if (alx->flags & ALX_FLAG_USING_MSIX) {
+               alx_write_mem32(hw, ALX_MSI_RETRANS_TIMER, msi_ctrl);
+               err = alx_request_msix(alx);
+               if (!err)
+                       goto out;
+
+               /* msix request failed, realloc resources */
+               alx_disable_advanced_intr(alx);
+               alx_init_intr(alx, false);
+       }
 
+       if (alx->flags & ALX_FLAG_USING_MSI) {
                alx_write_mem32(hw, ALX_MSI_RETRANS_TIMER,
                                msi_ctrl | ALX_MSI_MASK_SEL_LINE);
                err = request_irq(pdev->irq, alx_intr_msi, 0,
@@ -660,6 +827,7 @@ static int alx_request_irq(struct alx_priv *alx)
                if (!err)
                        goto out;
                /* fall back to legacy interrupt */
+               alx->flags &= ~ALX_FLAG_USING_MSI;
                pci_disable_msi(alx->hw.pdev);
        }
 
@@ -669,19 +837,25 @@ static int alx_request_irq(struct alx_priv *alx)
 out:
        if (!err)
                alx_config_vector_mapping(alx);
+       else
+               netdev_err(alx->dev, "IRQ registration failed!\n");
        return err;
 }
 
 static void alx_free_irq(struct alx_priv *alx)
 {
        struct pci_dev *pdev = alx->hw.pdev;
+       int i;
 
-       free_irq(pdev->irq, alx);
-
-       if (alx->msi) {
-               pci_disable_msi(alx->hw.pdev);
-               alx->msi = false;
+       if (alx->flags & ALX_FLAG_USING_MSIX) {
+               /* we have only 2 vectors without multi queue support */
+               for (i = 0; i < 2; i++)
+                       free_irq(alx->msix_entries[i].vector, alx);
+       } else {
+               free_irq(pdev->irq, alx);
        }
+
+       alx_disable_advanced_intr(alx);
 }
 
 static int alx_identify_hw(struct alx_priv *alx)
@@ -847,12 +1021,14 @@ static int __alx_open(struct alx_priv *alx, bool resume)
 {
        int err;
 
+       alx_init_intr(alx, msix);
+
        if (!resume)
                netif_carrier_off(alx->dev);
 
        err = alx_alloc_rings(alx);
        if (err)
-               return err;
+               goto out_disable_adv_intr;
 
        alx_configure(alx);
 
@@ -873,6 +1049,8 @@ static int __alx_open(struct alx_priv *alx, bool resume)
 
 out_free_rings:
        alx_free_rings(alx);
+out_disable_adv_intr:
+       alx_disable_advanced_intr(alx);
        return err;
 }
 
@@ -993,6 +1171,18 @@ static void alx_reset(struct work_struct *work)
        rtnl_unlock();
 }
 
+static int alx_tpd_req(struct sk_buff *skb)
+{
+       int num;
+
+       num = skb_shinfo(skb)->nr_frags + 1;
+       /* we need one extra descriptor for LSOv2 */
+       if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
+               num++;
+
+       return num;
+}
+
 static int alx_tx_csum(struct sk_buff *skb, struct alx_txd *first)
 {
        u8 cso, css;
@@ -1012,6 +1202,45 @@ static int alx_tx_csum(struct sk_buff *skb, struct alx_txd *first)
        return 0;
 }
 
+static int alx_tso(struct sk_buff *skb, struct alx_txd *first)
+{
+       int err;
+
+       if (skb->ip_summed != CHECKSUM_PARTIAL)
+               return 0;
+
+       if (!skb_is_gso(skb))
+               return 0;
+
+       err = skb_cow_head(skb, 0);
+       if (err < 0)
+               return err;
+
+       if (skb->protocol == htons(ETH_P_IP)) {
+               struct iphdr *iph = ip_hdr(skb);
+
+               iph->check = 0;
+               tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+                                                        0, IPPROTO_TCP, 0);
+               first->word1 |= 1 << TPD_IPV4_SHIFT;
+       } else if (skb_is_gso_v6(skb)) {
+               ipv6_hdr(skb)->payload_len = 0;
+               tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+                                                      &ipv6_hdr(skb)->daddr,
+                                                      0, IPPROTO_TCP, 0);
+               /* LSOv2: the first TPD only provides the packet length */
+               first->adrl.l.pkt_len = skb->len;
+               first->word1 |= 1 << TPD_LSO_V2_SHIFT;
+       }
+
+       first->word1 |= 1 << TPD_LSO_EN_SHIFT;
+       first->word1 |= (skb_transport_offset(skb) &
+                        TPD_L4HDROFFSET_MASK) << TPD_L4HDROFFSET_SHIFT;
+       first->word1 |= (skb_shinfo(skb)->gso_size &
+                        TPD_MSS_MASK) << TPD_MSS_SHIFT;
+       return 1;
+}
+
 static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 {
        struct alx_tx_queue *txq = &alx->txq;
@@ -1022,6 +1251,16 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
        first_tpd = &txq->tpd[txq->write_idx];
        tpd = first_tpd;
 
+       if (tpd->word1 & (1 << TPD_LSO_V2_SHIFT)) {
+               if (++txq->write_idx == alx->tx_ringsz)
+                       txq->write_idx = 0;
+
+               tpd = &txq->tpd[txq->write_idx];
+               tpd->len = first_tpd->len;
+               tpd->vlan_tag = first_tpd->vlan_tag;
+               tpd->word1 = first_tpd->word1;
+       }
+
        maplen = skb_headlen(skb);
        dma = dma_map_single(&alx->hw.pdev->dev, skb->data, maplen,
                             DMA_TO_DEVICE);
@@ -1082,9 +1321,9 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
        struct alx_priv *alx = netdev_priv(netdev);
        struct alx_tx_queue *txq = &alx->txq;
        struct alx_txd *first;
-       int tpdreq = skb_shinfo(skb)->nr_frags + 1;
+       int tso;
 
-       if (alx_tpd_avail(alx) < tpdreq) {
+       if (alx_tpd_avail(alx) < alx_tpd_req(skb)) {
                netif_stop_queue(alx->dev);
                goto drop;
        }
@@ -1092,7 +1331,10 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
        first = &txq->tpd[txq->write_idx];
        memset(first, 0, sizeof(*first));
 
-       if (alx_tx_csum(skb, first))
+       tso = alx_tso(skb, first);
+       if (tso < 0)
+               goto drop;
+       else if (!tso && alx_tx_csum(skb, first))
                goto drop;
 
        if (alx_map_tx_skb(alx, skb) < 0)
@@ -1172,7 +1414,10 @@ static void alx_poll_controller(struct net_device *netdev)
 {
        struct alx_priv *alx = netdev_priv(netdev);
 
-       if (alx->msi)
+       if (alx->flags & ALX_FLAG_USING_MSIX) {
+               alx_intr_msix_misc(0, alx);
+               alx_intr_msix_ring(0, alx);
+       } else if (alx->flags & ALX_FLAG_USING_MSI)
                alx_intr_msi(0, alx);
        else
                alx_intr_legacy(0, alx);
@@ -1351,7 +1596,10 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                }
        }
 
-       netdev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM;
+       netdev->hw_features = NETIF_F_SG |
+                             NETIF_F_HW_CSUM |
+                             NETIF_F_TSO |
+                             NETIF_F_TSO6;
 
        if (alx_get_perm_macaddr(hw, hw->perm_addr)) {
                dev_warn(&pdev->dev,
@@ -1545,6 +1793,8 @@ static const struct pci_device_id alx_pci_tbl[] = {
          .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
        { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2400),
          .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
+       { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2500),
+         .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
        { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8162),
          .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
        { PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8171) },
index 0959e68..1fc2d85 100644 (file)
@@ -38,6 +38,7 @@
 #define ALX_DEV_ID_AR8161                              0x1091
 #define ALX_DEV_ID_E2200                               0xe091
 #define ALX_DEV_ID_E2400                               0xe0a1
+#define ALX_DEV_ID_E2500                               0xe0b1
 #define ALX_DEV_ID_AR8162                              0x1090
 #define ALX_DEV_ID_AR8171                              0x10A1
 #define ALX_DEV_ID_AR8172                              0x10A0
index b2d3086..c3354b9 100644 (file)
@@ -58,8 +58,8 @@ BCM_SYSPORT_IO_MACRO(topctrl, SYS_PORT_TOPCTRL_OFFSET);
 static inline void intrl2_##which##_mask_clear(struct bcm_sysport_priv *priv, \
                                                u32 mask)               \
 {                                                                      \
-       intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR);     \
        priv->irq##which##_mask &= ~(mask);                             \
+       intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR);     \
 }                                                                      \
 static inline void intrl2_##which##_mask_set(struct bcm_sysport_priv *priv, \
                                                u32 mask)               \
@@ -1692,7 +1692,7 @@ static int bcm_sysport_stop(struct net_device *dev)
        return 0;
 }
 
-static struct ethtool_ops bcm_sysport_ethtool_ops = {
+static const struct ethtool_ops bcm_sysport_ethtool_ops = {
        .get_drvinfo            = bcm_sysport_get_drvinfo,
        .get_msglevel           = bcm_sysport_get_msglvl,
        .set_msglevel           = bcm_sysport_set_msglvl,
index 9a9745c..c16ec3a 100644 (file)
@@ -92,6 +92,7 @@ MODULE_DEVICE_TABLE(bcma, bgmac_bcma_tbl);
 /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipattach */
 static int bgmac_probe(struct bcma_device *core)
 {
+       struct bcma_chipinfo *ci = &core->bus->chipinfo;
        struct ssb_sprom *sprom = &core->bus->sprom;
        struct mii_bus *mii_bus;
        struct bgmac *bgmac;
@@ -157,9 +158,10 @@ static int bgmac_probe(struct bcma_device *core)
        dev_info(bgmac->dev, "Found PHY addr: %d%s\n", bgmac->phyaddr,
                 bgmac->phyaddr == BGMAC_PHY_NOREGS ? " (NOREGS)" : "");
 
-       if (!bgmac_is_bcm4707_family(core)) {
+       if (!bgmac_is_bcm4707_family(core) &&
+           !(ci->id == BCMA_CHIP_ID_BCM53573 && core->core_unit == 1)) {
                mii_bus = bcma_mdio_mii_register(core, bgmac->phyaddr);
-               if (!IS_ERR(mii_bus)) {
+               if (IS_ERR(mii_bus)) {
                        err = PTR_ERR(mii_bus);
                        goto err;
                }
@@ -230,6 +232,21 @@ static int bgmac_probe(struct bcma_device *core)
                bgmac->feature_flags |= BGMAC_FEAT_NO_RESET;
                bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500;
                break;
+       case BCMA_CHIP_ID_BCM53573:
+               bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+               bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+               if (ci->pkg == BCMA_PKG_ID_BCM47189)
+                       bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
+               if (core->core_unit == 0) {
+                       bgmac->feature_flags |= BGMAC_FEAT_CC4_IF_SW_TYPE;
+                       if (ci->pkg == BCMA_PKG_ID_BCM47189)
+                               bgmac->feature_flags |=
+                                       BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII;
+               } else if (core->core_unit == 1) {
+                       bgmac->feature_flags |= BGMAC_FEAT_IRQ_ID_OOB_6;
+                       bgmac->feature_flags |= BGMAC_FEAT_CC7_IF_TYPE_RGMII;
+               }
+               break;
        default:
                bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
                bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
index c4751ec..6ea0e5f 100644 (file)
@@ -932,7 +932,8 @@ static void bgmac_chip_reset(struct bgmac *bgmac)
                        et_swtype <<= 4;
                        sw_type = et_swtype;
                } else if (bgmac->feature_flags & BGMAC_FEAT_SW_TYPE_EPHYRMII) {
-                       sw_type = BGMAC_CHIPCTL_1_SW_TYPE_EPHYRMII;
+                       sw_type = BGMAC_CHIPCTL_1_IF_TYPE_RMII |
+                                 BGMAC_CHIPCTL_1_SW_TYPE_EPHYRMII;
                } else if (bgmac->feature_flags & BGMAC_FEAT_SW_TYPE_RGMII) {
                        sw_type = BGMAC_CHIPCTL_1_IF_TYPE_RGMII |
                                  BGMAC_CHIPCTL_1_SW_TYPE_RGMII;
@@ -940,6 +941,27 @@ static void bgmac_chip_reset(struct bgmac *bgmac)
                bgmac_cco_ctl_maskset(bgmac, 1, ~(BGMAC_CHIPCTL_1_IF_TYPE_MASK |
                                                  BGMAC_CHIPCTL_1_SW_TYPE_MASK),
                                      sw_type);
+       } else if (bgmac->feature_flags & BGMAC_FEAT_CC4_IF_SW_TYPE) {
+               u32 sw_type = BGMAC_CHIPCTL_4_IF_TYPE_MII |
+                             BGMAC_CHIPCTL_4_SW_TYPE_EPHY;
+               u8 et_swtype = 0;
+               char buf[4];
+
+               if (bcm47xx_nvram_getenv("et_swtype", buf, sizeof(buf)) > 0) {
+                       if (kstrtou8(buf, 0, &et_swtype))
+                               dev_err(bgmac->dev, "Failed to parse et_swtype (%s)\n",
+                                       buf);
+                       sw_type = (et_swtype & 0x0f) << 12;
+               } else if (bgmac->feature_flags & BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII) {
+                       sw_type = BGMAC_CHIPCTL_4_IF_TYPE_RGMII |
+                                 BGMAC_CHIPCTL_4_SW_TYPE_RGMII;
+               }
+               bgmac_cco_ctl_maskset(bgmac, 4, ~(BGMAC_CHIPCTL_4_IF_TYPE_MASK |
+                                                 BGMAC_CHIPCTL_4_SW_TYPE_MASK),
+                                     sw_type);
+       } else if (bgmac->feature_flags & BGMAC_FEAT_CC7_IF_TYPE_RGMII) {
+               bgmac_cco_ctl_maskset(bgmac, 7, ~BGMAC_CHIPCTL_7_IF_TYPE_MASK,
+                                     BGMAC_CHIPCTL_7_IF_TYPE_RGMII);
        }
 
        if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw)
@@ -1467,6 +1489,10 @@ int bgmac_enet_probe(struct bgmac *info)
         */
        bgmac_clk_enable(bgmac, 0);
 
+       /* This seems to be fixing IRQ by assigning OOB #6 to the core */
+       if (bgmac->feature_flags & BGMAC_FEAT_IRQ_ID_OOB_6)
+               bgmac_idm_write(bgmac, BCMA_OOB_SEL_OUT_A30, 0x86);
+
        bgmac_chip_reset(bgmac);
 
        err = bgmac_dma_alloc(bgmac);
index 24a2502..80836b4 100644 (file)
 #define BGMAC_CHIPCTL_1_SW_TYPE_RGMII          0x000000C0
 #define BGMAC_CHIPCTL_1_RXC_DLL_BYPASS         0x00010000
 
+#define BGMAC_CHIPCTL_4_IF_TYPE_MASK           0x00003000
+#define BGMAC_CHIPCTL_4_IF_TYPE_RMII           0x00000000
+#define BGMAC_CHIPCTL_4_IF_TYPE_MII            0x00001000
+#define BGMAC_CHIPCTL_4_IF_TYPE_RGMII          0x00002000
+#define BGMAC_CHIPCTL_4_SW_TYPE_MASK           0x0000C000
+#define BGMAC_CHIPCTL_4_SW_TYPE_EPHY           0x00000000
+#define BGMAC_CHIPCTL_4_SW_TYPE_EPHYMII                0x00004000
+#define BGMAC_CHIPCTL_4_SW_TYPE_EPHYRMII       0x00008000
+#define BGMAC_CHIPCTL_4_SW_TYPE_RGMII          0x0000C000
+
+#define BGMAC_CHIPCTL_7_IF_TYPE_MASK           0x000000C0
+#define BGMAC_CHIPCTL_7_IF_TYPE_RMII           0x00000000
+#define BGMAC_CHIPCTL_7_IF_TYPE_MII            0x00000040
+#define BGMAC_CHIPCTL_7_IF_TYPE_RGMII          0x00000080
+
 #define BGMAC_WEIGHT   64
 
 #define ETHER_MAX_LEN   1518
 #define BGMAC_FEAT_NO_CLR_MIB          BIT(13)
 #define BGMAC_FEAT_FORCE_SPEED_2500    BIT(14)
 #define BGMAC_FEAT_CMDCFG_SR_REV4      BIT(15)
+#define BGMAC_FEAT_IRQ_ID_OOB_6                BIT(16)
+#define BGMAC_FEAT_CC4_IF_SW_TYPE      BIT(17)
+#define BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII        BIT(18)
+#define BGMAC_FEAT_CC7_IF_TYPE_RGMII   BIT(19)
 
 struct bgmac_slot_info {
        union {
index 8fc3f3c..ecd357d 100644 (file)
@@ -50,7 +50,7 @@
 #include <linux/log2.h>
 #include <linux/aer.h>
 
-#if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE)
+#if IS_ENABLED(CONFIG_CNIC)
 #define BCM_CNIC 1
 #include "cnic_if.h"
 #endif
index 97e8925..dab61a8 100644 (file)
@@ -772,6 +772,11 @@ void bnx2x_fw_dump_lvl(struct bnx2x *bp, const char *lvl)
                (bp->common.bc_ver & 0xff00) >> 8,
                (bp->common.bc_ver & 0xff));
 
+       if (pci_channel_offline(bp->pdev)) {
+               BNX2X_ERR("Cannot dump MCP info while in PCI error\n");
+               return;
+       }
+
        val = REG_RD(bp, MCP_REG_MCPR_CPU_PROGRAM_COUNTER);
        if (val == REG_RD(bp, MCP_REG_MCPR_CPU_PROGRAM_COUNTER))
                BNX2X_ERR("%s" "MCP PC at 0x%x\n", lvl, val);
@@ -9415,10 +9420,16 @@ unload_error:
        /* Release IRQs */
        bnx2x_free_irq(bp);
 
-       /* Reset the chip */
-       rc = bnx2x_reset_hw(bp, reset_code);
-       if (rc)
-               BNX2X_ERR("HW_RESET failed\n");
+       /* Reset the chip, unless PCI function is offline. If we reach this
+        * point following a PCI error handling, it means device is really
+        * in a bad state and we're about to remove it, so reset the chip
+        * is not a good idea.
+        */
+       if (!pci_channel_offline(bp->pdev)) {
+               rc = bnx2x_reset_hw(bp, reset_code);
+               if (rc)
+                       BNX2X_ERR("HW_RESET failed\n");
+       }
 
        /* Report UNLOAD_DONE to MCP */
        bnx2x_send_unload_done(bp, keep_link);
@@ -12560,8 +12571,10 @@ static int bnx2x_init_mcast_macs_list(struct bnx2x *bp,
                kcalloc(mc_count, sizeof(*mc_mac), GFP_ATOMIC);
        struct netdev_hw_addr *ha;
 
-       if (!mc_mac)
+       if (!mc_mac) {
+               BNX2X_ERR("Failed to allocate mc MAC list\n");
                return -ENOMEM;
+       }
 
        INIT_LIST_HEAD(&p->mcast_list);
 
@@ -12632,7 +12645,7 @@ static int bnx2x_set_uc_list(struct bnx2x *bp)
                                 BNX2X_UC_LIST_MAC, &ramrod_flags);
 }
 
-static int bnx2x_set_mc_list(struct bnx2x *bp)
+static int bnx2x_set_mc_list_e1x(struct bnx2x *bp)
 {
        struct net_device *dev = bp->dev;
        struct bnx2x_mcast_ramrod_params rparam = {NULL};
@@ -12650,11 +12663,8 @@ static int bnx2x_set_mc_list(struct bnx2x *bp)
        /* then, configure a new MACs list */
        if (netdev_mc_count(dev)) {
                rc = bnx2x_init_mcast_macs_list(bp, &rparam);
-               if (rc) {
-                       BNX2X_ERR("Failed to create multicast MACs list: %d\n",
-                                 rc);
+               if (rc)
                        return rc;
-               }
 
                /* Now add the new MACs */
                rc = bnx2x_config_mcast(bp, &rparam,
@@ -12669,6 +12679,42 @@ static int bnx2x_set_mc_list(struct bnx2x *bp)
        return rc;
 }
 
+static int bnx2x_set_mc_list(struct bnx2x *bp)
+{
+       struct bnx2x_mcast_ramrod_params rparam = {NULL};
+       struct net_device *dev = bp->dev;
+       int rc = 0;
+
+       /* On older adapters, we need to flush and re-add filters */
+       if (CHIP_IS_E1x(bp))
+               return bnx2x_set_mc_list_e1x(bp);
+
+       rparam.mcast_obj = &bp->mcast_obj;
+
+       if (netdev_mc_count(dev)) {
+               rc = bnx2x_init_mcast_macs_list(bp, &rparam);
+               if (rc)
+                       return rc;
+
+               /* Override the curently configured set of mc filters */
+               rc = bnx2x_config_mcast(bp, &rparam,
+                                       BNX2X_MCAST_CMD_SET);
+               if (rc < 0)
+                       BNX2X_ERR("Failed to set a new multicast configuration: %d\n",
+                                 rc);
+
+               bnx2x_free_mcast_macs_list(&rparam);
+       } else {
+               /* If no mc addresses are required, flush the configuration */
+               rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
+               if (rc)
+                       BNX2X_ERR("Failed to clear multicast configuration %d\n",
+                                 rc);
+       }
+
+       return rc;
+}
+
 /* If bp->state is OPEN, should be called with netif_addr_lock_bh() */
 static void bnx2x_set_rx_mode(struct net_device *dev)
 {
@@ -13203,13 +13249,22 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev,
                NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_GRO |
                NETIF_F_RXHASH | NETIF_F_HW_VLAN_CTAG_TX;
        if (!chip_is_e1x) {
-               dev->hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL |
-                                   NETIF_F_GSO_IPXIP4;
+               dev->hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM |
+                                   NETIF_F_GSO_IPXIP4 |
+                                   NETIF_F_GSO_UDP_TUNNEL |
+                                   NETIF_F_GSO_UDP_TUNNEL_CSUM |
+                                   NETIF_F_GSO_PARTIAL;
+
                dev->hw_enc_features =
                        NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
                        NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 |
                        NETIF_F_GSO_IPXIP4 |
-                       NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL;
+                       NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM |
+                       NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM |
+                       NETIF_F_GSO_PARTIAL;
+
+               dev->gso_partial_features = NETIF_F_GSO_GRE_CSUM |
+                                           NETIF_F_GSO_UDP_TUNNEL_CSUM;
        }
 
        dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
index ff702a7..d468380 100644 (file)
@@ -2600,6 +2600,12 @@ struct bnx2x_mcast_mac_elem {
        u8 pad[2]; /* For a natural alignment of the following buffer */
 };
 
+struct bnx2x_mcast_bin_elem {
+       struct list_head link;
+       int bin;
+       int type; /* BNX2X_MCAST_CMD_SET_{ADD, DEL} */
+};
+
 struct bnx2x_pending_mcast_cmd {
        struct list_head link;
        int type; /* BNX2X_MCAST_CMD_X */
@@ -2609,6 +2615,11 @@ struct bnx2x_pending_mcast_cmd {
                int next_bin; /* Needed for RESTORE flow with aprox match */
        } data;
 
+       bool set_convert; /* in case type == BNX2X_MCAST_CMD_SET, this is set
+                          * when macs_head had been converted to a list of
+                          * bnx2x_mcast_bin_elem.
+                          */
+
        bool done; /* set to true, when the command has been handled,
                    * practically used in 57712 handling only, where one pending
                    * command may be handled in a few operations. As long as for
@@ -2636,15 +2647,30 @@ static int bnx2x_mcast_enqueue_cmd(struct bnx2x *bp,
        struct bnx2x_pending_mcast_cmd *new_cmd;
        struct bnx2x_mcast_mac_elem *cur_mac = NULL;
        struct bnx2x_mcast_list_elem *pos;
-       int macs_list_len = ((cmd == BNX2X_MCAST_CMD_ADD) ?
-                            p->mcast_list_len : 0);
+       int macs_list_len = 0, macs_list_len_size;
+
+       /* When adding MACs we'll need to store their values */
+       if (cmd == BNX2X_MCAST_CMD_ADD || cmd == BNX2X_MCAST_CMD_SET)
+               macs_list_len = p->mcast_list_len;
 
        /* If the command is empty ("handle pending commands only"), break */
        if (!p->mcast_list_len)
                return 0;
 
-       total_sz = sizeof(*new_cmd) +
-               macs_list_len * sizeof(struct bnx2x_mcast_mac_elem);
+       /* For a set command, we need to allocate sufficient memory for all
+        * the bins, since we can't analyze at this point how much memory would
+        * be required.
+        */
+       macs_list_len_size = macs_list_len *
+                            sizeof(struct bnx2x_mcast_mac_elem);
+       if (cmd == BNX2X_MCAST_CMD_SET) {
+               int bin_size = BNX2X_MCAST_BINS_NUM *
+                              sizeof(struct bnx2x_mcast_bin_elem);
+
+               if (bin_size > macs_list_len_size)
+                       macs_list_len_size = bin_size;
+       }
+       total_sz = sizeof(*new_cmd) + macs_list_len_size;
 
        /* Add mcast is called under spin_lock, thus calling with GFP_ATOMIC */
        new_cmd = kzalloc(total_sz, GFP_ATOMIC);
@@ -2662,6 +2688,7 @@ static int bnx2x_mcast_enqueue_cmd(struct bnx2x *bp,
 
        switch (cmd) {
        case BNX2X_MCAST_CMD_ADD:
+       case BNX2X_MCAST_CMD_SET:
                cur_mac = (struct bnx2x_mcast_mac_elem *)
                          ((u8 *)new_cmd + sizeof(*new_cmd));
 
@@ -2771,7 +2798,8 @@ static void bnx2x_mcast_set_one_rule_e2(struct bnx2x *bp,
        u8 rx_tx_add_flag = bnx2x_mcast_get_rx_tx_flag(o);
        int bin;
 
-       if ((cmd == BNX2X_MCAST_CMD_ADD) || (cmd == BNX2X_MCAST_CMD_RESTORE))
+       if ((cmd == BNX2X_MCAST_CMD_ADD) || (cmd == BNX2X_MCAST_CMD_RESTORE) ||
+           (cmd == BNX2X_MCAST_CMD_SET_ADD))
                rx_tx_add_flag |= ETH_MULTICAST_RULES_CMD_IS_ADD;
 
        data->rules[idx].cmd_general_data |= rx_tx_add_flag;
@@ -2797,6 +2825,16 @@ static void bnx2x_mcast_set_one_rule_e2(struct bnx2x *bp,
                bin = cfg_data->bin;
                break;
 
+       case BNX2X_MCAST_CMD_SET_ADD:
+               bin = cfg_data->bin;
+               BIT_VEC64_SET_BIT(o->registry.aprox_match.vec, bin);
+               break;
+
+       case BNX2X_MCAST_CMD_SET_DEL:
+               bin = cfg_data->bin;
+               BIT_VEC64_CLEAR_BIT(o->registry.aprox_match.vec, bin);
+               break;
+
        default:
                BNX2X_ERR("Unknown command: %d\n", cmd);
                return;
@@ -2932,6 +2970,102 @@ static inline void bnx2x_mcast_hdl_pending_restore_e2(struct bnx2x *bp,
                cmd_pos->data.next_bin++;
 }
 
+static void
+bnx2x_mcast_hdl_pending_set_e2_convert(struct bnx2x *bp,
+                                      struct bnx2x_mcast_obj *o,
+                                      struct bnx2x_pending_mcast_cmd *cmd_pos)
+{
+       u64 cur[BNX2X_MCAST_VEC_SZ], req[BNX2X_MCAST_VEC_SZ];
+       struct bnx2x_mcast_mac_elem *pmac_pos, *pmac_pos_n;
+       struct bnx2x_mcast_bin_elem *p_item;
+       int i, cnt = 0, mac_cnt = 0;
+
+       memset(req, 0, sizeof(u64) * BNX2X_MCAST_VEC_SZ);
+       memcpy(cur, o->registry.aprox_match.vec,
+              sizeof(u64) * BNX2X_MCAST_VEC_SZ);
+
+       /* Fill `current' with the required set of bins to configure */
+       list_for_each_entry_safe(pmac_pos, pmac_pos_n, &cmd_pos->data.macs_head,
+                                link) {
+               int bin = bnx2x_mcast_bin_from_mac(pmac_pos->mac);
+
+               DP(BNX2X_MSG_SP, "Set contains %pM mcast MAC\n",
+                  pmac_pos->mac);
+
+               BIT_VEC64_SET_BIT(req, bin);
+               list_del(&pmac_pos->link);
+               mac_cnt++;
+       }
+
+       /* We no longer have use for the MACs; Need to re-use memory for
+        * a list that will be used to configure bins.
+        */
+       cmd_pos->set_convert = true;
+       p_item = (struct bnx2x_mcast_bin_elem *)(cmd_pos + 1);
+       INIT_LIST_HEAD(&cmd_pos->data.macs_head);
+
+       for (i = 0; i < BNX2X_MCAST_BINS_NUM; i++) {
+               bool b_current = !!BIT_VEC64_TEST_BIT(cur, i);
+               bool b_required = !!BIT_VEC64_TEST_BIT(req, i);
+
+               if (b_current == b_required)
+                       continue;
+
+               p_item->bin = i;
+               p_item->type = b_required ? BNX2X_MCAST_CMD_SET_ADD
+                                         : BNX2X_MCAST_CMD_SET_DEL;
+               list_add_tail(&p_item->link , &cmd_pos->data.macs_head);
+               p_item++;
+               cnt++;
+       }
+
+       /* We now definitely know how many commands are hiding here.
+        * Also need to correct the disruption we've added to guarantee this
+        * would be enqueued.
+        */
+       o->total_pending_num -= (o->max_cmd_len + mac_cnt);
+       o->total_pending_num += cnt;
+
+       DP(BNX2X_MSG_SP, "o->total_pending_num=%d\n", o->total_pending_num);
+}
+
+static void
+bnx2x_mcast_hdl_pending_set_e2(struct bnx2x *bp,
+                              struct bnx2x_mcast_obj *o,
+                              struct bnx2x_pending_mcast_cmd *cmd_pos,
+                              int *cnt)
+{
+       union bnx2x_mcast_config_data cfg_data = {NULL};
+       struct bnx2x_mcast_bin_elem *p_item, *p_item_n;
+
+       /* This is actually a 2-part scheme - it starts by converting the MACs
+        * into a list of bins to be added/removed, and correcting the numbers
+        * on the object. this is now allowed, as we're now sure that all
+        * previous configured requests have already applied.
+        * The second part is actually adding rules for the newly introduced
+        * entries [like all the rest of the hdl_pending functions].
+        */
+       if (!cmd_pos->set_convert)
+               bnx2x_mcast_hdl_pending_set_e2_convert(bp, o, cmd_pos);
+
+       list_for_each_entry_safe(p_item, p_item_n, &cmd_pos->data.macs_head,
+                                link) {
+               cfg_data.bin = (u8)p_item->bin;
+               o->set_one_rule(bp, o, *cnt, &cfg_data, p_item->type);
+               (*cnt)++;
+
+               list_del(&p_item->link);
+
+               /* Break if we reached the maximum number of rules. */
+               if (*cnt >= o->max_cmd_len)
+                       break;
+       }
+
+       /* if no more MACs to configure - we are done */
+       if (list_empty(&cmd_pos->data.macs_head))
+               cmd_pos->done = true;
+}
+
 static inline int bnx2x_mcast_handle_pending_cmds_e2(struct bnx2x *bp,
                                struct bnx2x_mcast_ramrod_params *p)
 {
@@ -2955,6 +3089,10 @@ static inline int bnx2x_mcast_handle_pending_cmds_e2(struct bnx2x *bp,
                                                           &cnt);
                        break;
 
+               case BNX2X_MCAST_CMD_SET:
+                       bnx2x_mcast_hdl_pending_set_e2(bp, o, cmd_pos, &cnt);
+                       break;
+
                default:
                        BNX2X_ERR("Unknown command: %d\n", cmd_pos->type);
                        return -EINVAL;
@@ -3095,6 +3233,19 @@ static int bnx2x_mcast_validate_e2(struct bnx2x *bp,
                o->set_registry_size(o, reg_sz + p->mcast_list_len);
                break;
 
+       case BNX2X_MCAST_CMD_SET:
+               /* We can only learn how many commands would actually be used
+                * when this is being configured. So for now, simply guarantee
+                * the command will be enqueued [to refrain from adding logic
+                * that handles this and THEN learns it needs several ramrods].
+                * Just like for ADD/Cont, the mcast_list_len might be an over
+                * estimation; or even more so, since we don't take into
+                * account the possibility of removal of existing bins.
+                */
+               o->set_registry_size(o, reg_sz + p->mcast_list_len);
+               o->total_pending_num += o->max_cmd_len;
+               break;
+
        default:
                BNX2X_ERR("Unknown command: %d\n", cmd);
                return -EINVAL;
@@ -3108,12 +3259,16 @@ static int bnx2x_mcast_validate_e2(struct bnx2x *bp,
 
 static void bnx2x_mcast_revert_e2(struct bnx2x *bp,
                                      struct bnx2x_mcast_ramrod_params *p,
-                                     int old_num_bins)
+                                 int old_num_bins,
+                                 enum bnx2x_mcast_cmd cmd)
 {
        struct bnx2x_mcast_obj *o = p->mcast_obj;
 
        o->set_registry_size(o, old_num_bins);
        o->total_pending_num -= p->mcast_list_len;
+
+       if (cmd == BNX2X_MCAST_CMD_SET)
+               o->total_pending_num -= o->max_cmd_len;
 }
 
 /**
@@ -3223,9 +3378,11 @@ static int bnx2x_mcast_setup_e2(struct bnx2x *bp,
                bnx2x_mcast_refresh_registry_e2(bp, o);
 
        /* If CLEAR_ONLY was requested - don't send a ramrod and clear
-        * RAMROD_PENDING status immediately.
+        * RAMROD_PENDING status immediately. due to the SET option, it's also
+        * possible that after evaluating the differences there's no need for
+        * a ramrod. In that case, we can skip it as well.
         */
-       if (test_bit(RAMROD_DRV_CLR_ONLY, &p->ramrod_flags)) {
+       if (test_bit(RAMROD_DRV_CLR_ONLY, &p->ramrod_flags) || !cnt) {
                raw->clear_pending(raw);
                return 0;
        } else {
@@ -3253,6 +3410,11 @@ static int bnx2x_mcast_validate_e1h(struct bnx2x *bp,
                                    struct bnx2x_mcast_ramrod_params *p,
                                    enum bnx2x_mcast_cmd cmd)
 {
+       if (cmd == BNX2X_MCAST_CMD_SET) {
+               BNX2X_ERR("Can't use `set' command on e1h!\n");
+               return -EINVAL;
+       }
+
        /* Mark, that there is a work to do */
        if ((cmd == BNX2X_MCAST_CMD_DEL) || (cmd == BNX2X_MCAST_CMD_RESTORE))
                p->mcast_list_len = 1;
@@ -3262,7 +3424,8 @@ static int bnx2x_mcast_validate_e1h(struct bnx2x *bp,
 
 static void bnx2x_mcast_revert_e1h(struct bnx2x *bp,
                                       struct bnx2x_mcast_ramrod_params *p,
-                                      int old_num_bins)
+                                      int old_num_bins,
+                                      enum bnx2x_mcast_cmd cmd)
 {
        /* Do nothing */
 }
@@ -3372,6 +3535,11 @@ static int bnx2x_mcast_validate_e1(struct bnx2x *bp,
        struct bnx2x_mcast_obj *o = p->mcast_obj;
        int reg_sz = o->get_registry_size(o);
 
+       if (cmd == BNX2X_MCAST_CMD_SET) {
+               BNX2X_ERR("Can't use `set' command on e1!\n");
+               return -EINVAL;
+       }
+
        switch (cmd) {
        /* DEL command deletes all currently configured MACs */
        case BNX2X_MCAST_CMD_DEL:
@@ -3422,7 +3590,8 @@ static int bnx2x_mcast_validate_e1(struct bnx2x *bp,
 
 static void bnx2x_mcast_revert_e1(struct bnx2x *bp,
                                      struct bnx2x_mcast_ramrod_params *p,
-                                     int old_num_macs)
+                                  int old_num_macs,
+                                  enum bnx2x_mcast_cmd cmd)
 {
        struct bnx2x_mcast_obj *o = p->mcast_obj;
 
@@ -3816,7 +3985,7 @@ error_exit2:
        r->clear_pending(r);
 
 error_exit1:
-       o->revert(bp, p, old_reg_size);
+       o->revert(bp, p, old_reg_size, cmd);
 
        return rc;
 }
index 4048fc5..0bf2fd4 100644 (file)
@@ -536,6 +536,15 @@ enum bnx2x_mcast_cmd {
        BNX2X_MCAST_CMD_CONT,
        BNX2X_MCAST_CMD_DEL,
        BNX2X_MCAST_CMD_RESTORE,
+
+       /* Following this, multicast configuration should equal to approx
+        * the set of MACs provided [i.e., remove all else].
+        * The two sub-commands are used internally to decide whether a given
+        * bin is to be added or removed
+        */
+       BNX2X_MCAST_CMD_SET,
+       BNX2X_MCAST_CMD_SET_ADD,
+       BNX2X_MCAST_CMD_SET_DEL,
 };
 
 struct bnx2x_mcast_obj {
@@ -635,7 +644,8 @@ struct bnx2x_mcast_obj {
         */
        void (*revert)(struct bnx2x *bp,
                       struct bnx2x_mcast_ramrod_params *p,
-                      int old_num_bins);
+                      int old_num_bins,
+                      enum bnx2x_mcast_cmd cmd);
 
        int (*get_registry_size)(struct bnx2x_mcast_obj *o);
        void (*set_registry_size)(struct bnx2x_mcast_obj *o, int n);
index 632daff..6c586b0 100644 (file)
@@ -573,17 +573,6 @@ int bnx2x_vf_mcast(struct bnx2x *bp, struct bnx2x_virtf *vf,
                }
        }
 
-       /* clear existing mcasts */
-       mcast.mcast_list_len = vf->mcast_list_len;
-       vf->mcast_list_len = mc_num;
-       rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_DEL);
-       if (rc) {
-               BNX2X_ERR("Failed to remove multicasts\n");
-               kfree(mc);
-               return rc;
-       }
-
-       /* update mcast list on the ramrod params */
        if (mc_num) {
                INIT_LIST_HEAD(&mcast.mcast_list);
                for (i = 0; i < mc_num; i++) {
@@ -594,12 +583,18 @@ int bnx2x_vf_mcast(struct bnx2x *bp, struct bnx2x_virtf *vf,
 
                /* add new mcasts */
                mcast.mcast_list_len = mc_num;
-               rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_ADD);
+               rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_SET);
                if (rc)
-                       BNX2X_ERR("Faled to add multicasts\n");
-               kfree(mc);
+                       BNX2X_ERR("Faled to set multicasts\n");
+       } else {
+               /* clear existing mcasts */
+               rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_DEL);
+               if (rc)
+                       BNX2X_ERR("Failed to remove multicasts\n");
        }
 
+       kfree(mc);
+
        return rc;
 }
 
@@ -1583,7 +1578,6 @@ int bnx2x_iov_nic_init(struct bnx2x *bp)
                 *  It needs to be initialized here so that it can be safely
                 *  handled by a subsequent FLR flow.
                 */
-               vf->mcast_list_len = 0;
                bnx2x_init_mcast_obj(bp, &vf->mcast_obj, 0xFF,
                                     0xFF, 0xFF, 0xFF,
                                     bnx2x_vf_sp(bp, vf, mcast_rdata),
index 670a581..7a6d406 100644 (file)
@@ -195,7 +195,6 @@ struct bnx2x_virtf {
        int leading_rss;
 
        /* MCAST object */
-       int mcast_list_len;
        struct bnx2x_mcast_obj          mcast_obj;
 
        /* RSS configuration object */
index 2cf7910..228c964 100644 (file)
@@ -353,8 +353,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
                push_len = (length + sizeof(*tx_push) + 7) / 8;
                if (push_len > 16) {
                        __iowrite64_copy(txr->tx_doorbell, tx_push_buf, 16);
-                       __iowrite64_copy(txr->tx_doorbell + 4, tx_push_buf + 1,
-                                        push_len - 16);
+                       __iowrite32_copy(txr->tx_doorbell + 4, tx_push_buf + 1,
+                                        (push_len - 16) << 1);
                } else {
                        __iowrite64_copy(txr->tx_doorbell, tx_push_buf,
                                         push_len);
index 8d4f849..46f9043 100644 (file)
@@ -973,7 +973,7 @@ static int bcmgenet_nway_reset(struct net_device *dev)
 }
 
 /* standard ethtool support functions. */
-static struct ethtool_ops bcmgenet_ethtool_ops = {
+static const struct ethtool_ops bcmgenet_ethtool_ops = {
        .get_strings            = bcmgenet_get_strings,
        .get_sset_count         = bcmgenet_get_sset_count,
        .get_ethtool_stats      = bcmgenet_get_ethtool_stats,
index 6592612..a2551bc 100644 (file)
@@ -14012,6 +14012,7 @@ static int tg3_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
        if ((ec->rx_coalesce_usecs > MAX_RXCOL_TICKS) ||
            (!ec->rx_coalesce_usecs) ||
            (ec->tx_coalesce_usecs > MAX_TXCOL_TICKS) ||
+           (!ec->tx_coalesce_usecs) ||
            (ec->rx_max_coalesced_frames > MAX_RXMAX_FRAMES) ||
            (ec->tx_max_coalesced_frames > MAX_TXMAX_FRAMES) ||
            (ec->rx_coalesce_usecs_irq > max_rxcoal_tick_int) ||
@@ -14022,16 +14023,6 @@ static int tg3_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
            (ec->stats_block_coalesce_usecs < min_stat_coal_ticks))
                return -EINVAL;
 
-       /* No rx interrupts will be generated if both are zero */
-       if ((ec->rx_coalesce_usecs == 0) &&
-           (ec->rx_max_coalesced_frames == 0))
-               return -EINVAL;
-
-       /* No tx interrupts will be generated if both are zero */
-       if ((ec->tx_coalesce_usecs == 0) &&
-           (ec->tx_max_coalesced_frames == 0))
-               return -EINVAL;
-
        /* Only copy relevant parameters, ignore all others. */
        tp->coal.rx_coalesce_usecs = ec->rx_coalesce_usecs;
        tp->coal.tx_coalesce_usecs = ec->tx_coalesce_usecs;
index dbce938..63144bb 100644 (file)
@@ -1339,6 +1339,24 @@ dma_error:
        return 0;
 }
 
+static inline int macb_clear_csum(struct sk_buff *skb)
+{
+       /* no change for packets without checksum offloading */
+       if (skb->ip_summed != CHECKSUM_PARTIAL)
+               return 0;
+
+       /* make sure we can modify the header */
+       if (unlikely(skb_cow_head(skb, 0)))
+               return -1;
+
+       /* initialize checksum field
+        * This is required - at least for Zynq, which otherwise calculates
+        * wrong UDP header checksums for UDP packets with UDP data len <=2
+        */
+       *(__sum16 *)(skb_checksum_start(skb) + skb->csum_offset) = 0;
+       return 0;
+}
+
 static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        u16 queue_index = skb_get_queue_mapping(skb);
@@ -1378,6 +1396,11 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
                return NETDEV_TX_BUSY;
        }
 
+       if (macb_clear_csum(skb)) {
+               dev_kfree_skb_any(skb);
+               goto unlock;
+       }
+
        /* Map socket buffer for DMA transfer */
        if (!macb_tx_map(bp, queue, skb)) {
                dev_kfree_skb_any(skb);
@@ -2332,7 +2355,8 @@ static void macb_probe_queues(void __iomem *mem,
 }
 
 static int macb_clk_init(struct platform_device *pdev, struct clk **pclk,
-                        struct clk **hclk, struct clk **tx_clk)
+                        struct clk **hclk, struct clk **tx_clk,
+                        struct clk **rx_clk)
 {
        int err;
 
@@ -2354,6 +2378,10 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk,
        if (IS_ERR(*tx_clk))
                *tx_clk = NULL;
 
+       *rx_clk = devm_clk_get(&pdev->dev, "rx_clk");
+       if (IS_ERR(*rx_clk))
+               *rx_clk = NULL;
+
        err = clk_prepare_enable(*pclk);
        if (err) {
                dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err);
@@ -2372,8 +2400,17 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk,
                goto err_disable_hclk;
        }
 
+       err = clk_prepare_enable(*rx_clk);
+       if (err) {
+               dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err);
+               goto err_disable_txclk;
+       }
+
        return 0;
 
+err_disable_txclk:
+       clk_disable_unprepare(*tx_clk);
+
 err_disable_hclk:
        clk_disable_unprepare(*hclk);
 
@@ -2763,12 +2800,14 @@ static const struct net_device_ops at91ether_netdev_ops = {
 };
 
 static int at91ether_clk_init(struct platform_device *pdev, struct clk **pclk,
-                             struct clk **hclk, struct clk **tx_clk)
+                             struct clk **hclk, struct clk **tx_clk,
+                             struct clk **rx_clk)
 {
        int err;
 
        *hclk = NULL;
        *tx_clk = NULL;
+       *rx_clk = NULL;
 
        *pclk = devm_clk_get(&pdev->dev, "ether_clk");
        if (IS_ERR(*pclk))
@@ -2892,13 +2931,13 @@ MODULE_DEVICE_TABLE(of, macb_dt_ids);
 static int macb_probe(struct platform_device *pdev)
 {
        int (*clk_init)(struct platform_device *, struct clk **,
-                       struct clk **, struct clk **)
+                       struct clk **, struct clk **,  struct clk **)
                                              = macb_clk_init;
        int (*init)(struct platform_device *) = macb_init;
        struct device_node *np = pdev->dev.of_node;
        struct device_node *phy_node;
        const struct macb_config *macb_config = NULL;
-       struct clk *pclk, *hclk = NULL, *tx_clk = NULL;
+       struct clk *pclk, *hclk = NULL, *tx_clk = NULL, *rx_clk = NULL;
        unsigned int queue_mask, num_queues;
        struct macb_platform_data *pdata;
        bool native_io;
@@ -2926,7 +2965,7 @@ static int macb_probe(struct platform_device *pdev)
                }
        }
 
-       err = clk_init(pdev, &pclk, &hclk, &tx_clk);
+       err = clk_init(pdev, &pclk, &hclk, &tx_clk, &rx_clk);
        if (err)
                return err;
 
@@ -2962,6 +3001,7 @@ static int macb_probe(struct platform_device *pdev)
        bp->pclk = pclk;
        bp->hclk = hclk;
        bp->tx_clk = tx_clk;
+       bp->rx_clk = rx_clk;
        if (macb_config)
                bp->jumbo_max_len = macb_config->jumbo_max_len;
 
@@ -3060,6 +3100,7 @@ err_disable_clocks:
        clk_disable_unprepare(tx_clk);
        clk_disable_unprepare(hclk);
        clk_disable_unprepare(pclk);
+       clk_disable_unprepare(rx_clk);
 
        return err;
 }
@@ -3086,6 +3127,7 @@ static int macb_remove(struct platform_device *pdev)
                clk_disable_unprepare(bp->tx_clk);
                clk_disable_unprepare(bp->hclk);
                clk_disable_unprepare(bp->pclk);
+               clk_disable_unprepare(bp->rx_clk);
                free_netdev(dev);
        }
 
@@ -3109,6 +3151,7 @@ static int __maybe_unused macb_suspend(struct device *dev)
                clk_disable_unprepare(bp->tx_clk);
                clk_disable_unprepare(bp->hclk);
                clk_disable_unprepare(bp->pclk);
+               clk_disable_unprepare(bp->rx_clk);
        }
 
        return 0;
@@ -3128,6 +3171,7 @@ static int __maybe_unused macb_resume(struct device *dev)
                clk_prepare_enable(bp->pclk);
                clk_prepare_enable(bp->hclk);
                clk_prepare_enable(bp->tx_clk);
+               clk_prepare_enable(bp->rx_clk);
        }
 
        netif_device_attach(netdev);
index aa3aeec..8bed4b5 100644 (file)
@@ -772,7 +772,8 @@ struct macb_config {
        u32                     caps;
        unsigned int            dma_burst_length;
        int     (*clk_init)(struct platform_device *pdev, struct clk **pclk,
-                           struct clk **hclk, struct clk **tx_clk);
+                           struct clk **hclk, struct clk **tx_clk,
+                           struct clk **rx_clk);
        int     (*init)(struct platform_device *pdev);
        int     jumbo_max_len;
 };
@@ -819,6 +820,7 @@ struct macb {
        struct clk              *pclk;
        struct clk              *hclk;
        struct clk              *tx_clk;
+       struct clk              *rx_clk;
        struct net_device       *dev;
        struct napi_struct      napi;
        struct net_device_stats stats;
index e1b78b5..92f411c 100644 (file)
@@ -58,7 +58,7 @@ config LIQUIDIO
        select LIBCRC32C
        ---help---
          This driver supports Cavium LiquidIO Intelligent Server Adapters
-         based on CN66XX and CN68XX chips.
+         based on CN66XX, CN68XX and CN23XX chips.
 
          To compile this driver as a module, choose M here: the module
          will be called liquidio.  This is recommended.
index 2f36680..5a27b2a 100644 (file)
@@ -3,14 +3,16 @@
 #
 obj-$(CONFIG_LIQUIDIO) += liquidio.o
 
-liquidio-objs := lio_main.o  \
-             lio_ethtool.o      \
-             request_manager.o  \
-             response_manager.o \
-             octeon_device.o    \
-             cn66xx_device.o    \
-             cn68xx_device.o    \
-             octeon_mem_ops.o   \
-             octeon_droq.o      \
-             octeon_console.o   \
-             octeon_nic.o
+liquidio-$(CONFIG_LIQUIDIO) += lio_ethtool.o \
+                       lio_core.o         \
+                       request_manager.o  \
+                       response_manager.o \
+                       octeon_device.o    \
+                       cn66xx_device.o    \
+                       cn68xx_device.o    \
+                       cn23xx_pf_device.o \
+                       octeon_mem_ops.o   \
+                       octeon_droq.o      \
+                       octeon_nic.o
+
+liquidio-objs := lio_main.o octeon_console.o $(liquidio-y)
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
new file mode 100644 (file)
index 0000000..bddb198
--- /dev/null
@@ -0,0 +1,1237 @@
+/**********************************************************************
+* Author: Cavium, Inc.
+*
+* Contact: support@cavium.com
+*          Please include "LiquidIO" in the subject.
+*
+* Copyright (c) 2003-2015 Cavium, Inc.
+*
+* This file is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License, Version 2, as
+* published by the Free Software Foundation.
+*
+* This file is distributed in the hope that it will be useful, but
+* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+* NONINFRINGEMENT.  See the GNU General Public License for more
+* details.
+*
+* This file may also be available under a different license from Cavium.
+* Contact Cavium, Inc. for more information
+**********************************************************************/
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include "liquidio_common.h"
+#include "octeon_droq.h"
+#include "octeon_iq.h"
+#include "response_manager.h"
+#include "octeon_device.h"
+#include "cn23xx_pf_device.h"
+#include "octeon_main.h"
+
+#define RESET_NOTDONE 0
+#define RESET_DONE 1
+
+/* Change the value of SLI Packet Input Jabber Register to allow
+ * VXLAN TSO packets which can be 64424 bytes, exceeding the
+ * MAX_GSO_SIZE we supplied to the kernel
+ */
+#define CN23XX_INPUT_JABBER 64600
+
+#define LIOLUT_RING_DISTRIBUTION 9
+const int liolut_num_vfs_to_rings_per_vf[LIOLUT_RING_DISTRIBUTION] = {
+       0, 8, 4, 2, 2, 2, 1, 1, 1
+};
+
+void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct)
+{
+       int i = 0;
+       u32 regval = 0;
+       struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+
+       /*In cn23xx_soft_reset*/
+       dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%llx\n",
+               "CN23XX_WIN_WR_MASK_REG", CVM_CAST64(CN23XX_WIN_WR_MASK_REG),
+               CVM_CAST64(octeon_read_csr64(oct, CN23XX_WIN_WR_MASK_REG)));
+       dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+               "CN23XX_SLI_SCRATCH1", CVM_CAST64(CN23XX_SLI_SCRATCH1),
+               CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_SCRATCH1)));
+       dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+               "CN23XX_RST_SOFT_RST", CN23XX_RST_SOFT_RST,
+               lio_pci_readq(oct, CN23XX_RST_SOFT_RST));
+
+       /*In cn23xx_set_dpi_regs*/
+       dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+               "CN23XX_DPI_DMA_CONTROL", CN23XX_DPI_DMA_CONTROL,
+               lio_pci_readq(oct, CN23XX_DPI_DMA_CONTROL));
+
+       for (i = 0; i < 6; i++) {
+               dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+                       "CN23XX_DPI_DMA_ENG_ENB", i,
+                       CN23XX_DPI_DMA_ENG_ENB(i),
+                       lio_pci_readq(oct, CN23XX_DPI_DMA_ENG_ENB(i)));
+               dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+                       "CN23XX_DPI_DMA_ENG_BUF", i,
+                       CN23XX_DPI_DMA_ENG_BUF(i),
+                       lio_pci_readq(oct, CN23XX_DPI_DMA_ENG_BUF(i)));
+       }
+
+       dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", "CN23XX_DPI_CTL",
+               CN23XX_DPI_CTL, lio_pci_readq(oct, CN23XX_DPI_CTL));
+
+       /*In cn23xx_setup_pcie_mps and cn23xx_setup_pcie_mrrs */
+       pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, &regval);
+       dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+               "CN23XX_CONFIG_PCIE_DEVCTL",
+               CVM_CAST64(CN23XX_CONFIG_PCIE_DEVCTL), CVM_CAST64(regval));
+
+       dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+               "CN23XX_DPI_SLI_PRTX_CFG", oct->pcie_port,
+               CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port),
+               lio_pci_readq(oct, CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port)));
+
+       /*In cn23xx_specific_regs_setup */
+       dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+               "CN23XX_SLI_S2M_PORTX_CTL", oct->pcie_port,
+               CVM_CAST64(CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port)),
+               CVM_CAST64(octeon_read_csr64(
+                       oct, CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port))));
+
+       dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+               "CN23XX_SLI_RING_RST", CVM_CAST64(CN23XX_SLI_PKT_IOQ_RING_RST),
+               (u64)octeon_read_csr64(oct, CN23XX_SLI_PKT_IOQ_RING_RST));
+
+       /*In cn23xx_setup_global_mac_regs*/
+       for (i = 0; i < CN23XX_MAX_MACS; i++) {
+               dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+                       "CN23XX_SLI_PKT_MAC_RINFO64", i,
+                       CVM_CAST64(CN23XX_SLI_PKT_MAC_RINFO64(i, oct->pf_num)),
+                       CVM_CAST64(octeon_read_csr64
+                               (oct, CN23XX_SLI_PKT_MAC_RINFO64
+                                       (i, oct->pf_num))));
+       }
+
+       /*In cn23xx_setup_global_input_regs*/
+       for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+               dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+                       "CN23XX_SLI_IQ_PKT_CONTROL64", i,
+                       CVM_CAST64(CN23XX_SLI_IQ_PKT_CONTROL64(i)),
+                       CVM_CAST64(octeon_read_csr64
+                               (oct, CN23XX_SLI_IQ_PKT_CONTROL64(i))));
+       }
+
+       /*In cn23xx_setup_global_output_regs*/
+       dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+               "CN23XX_SLI_OQ_WMARK", CVM_CAST64(CN23XX_SLI_OQ_WMARK),
+               CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_OQ_WMARK)));
+
+       for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+               dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+                       "CN23XX_SLI_OQ_PKT_CONTROL", i,
+                       CVM_CAST64(CN23XX_SLI_OQ_PKT_CONTROL(i)),
+                       CVM_CAST64(octeon_read_csr(
+                               oct, CN23XX_SLI_OQ_PKT_CONTROL(i))));
+               dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+                       "CN23XX_SLI_OQ_PKT_INT_LEVELS", i,
+                       CVM_CAST64(CN23XX_SLI_OQ_PKT_INT_LEVELS(i)),
+                       CVM_CAST64(octeon_read_csr64(
+                               oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(i))));
+       }
+
+       /*In cn23xx_enable_interrupt and cn23xx_disable_interrupt*/
+       dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+               "cn23xx->intr_enb_reg64",
+               CVM_CAST64((long)(cn23xx->intr_enb_reg64)),
+               CVM_CAST64(readq(cn23xx->intr_enb_reg64)));
+
+       dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+               "cn23xx->intr_sum_reg64",
+               CVM_CAST64((long)(cn23xx->intr_sum_reg64)),
+               CVM_CAST64(readq(cn23xx->intr_sum_reg64)));
+
+       /*In cn23xx_setup_iq_regs*/
+       for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+               dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+                       "CN23XX_SLI_IQ_BASE_ADDR64", i,
+                       CVM_CAST64(CN23XX_SLI_IQ_BASE_ADDR64(i)),
+                       CVM_CAST64(octeon_read_csr64(
+                               oct, CN23XX_SLI_IQ_BASE_ADDR64(i))));
+               dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+                       "CN23XX_SLI_IQ_SIZE", i,
+                       CVM_CAST64(CN23XX_SLI_IQ_SIZE(i)),
+                       CVM_CAST64(octeon_read_csr
+                               (oct, CN23XX_SLI_IQ_SIZE(i))));
+               dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+                       "CN23XX_SLI_IQ_DOORBELL", i,
+                       CVM_CAST64(CN23XX_SLI_IQ_DOORBELL(i)),
+                       CVM_CAST64(octeon_read_csr64(
+                               oct, CN23XX_SLI_IQ_DOORBELL(i))));
+               dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+                       "CN23XX_SLI_IQ_INSTR_COUNT64", i,
+                       CVM_CAST64(CN23XX_SLI_IQ_INSTR_COUNT64(i)),
+                       CVM_CAST64(octeon_read_csr64(
+                               oct, CN23XX_SLI_IQ_INSTR_COUNT64(i))));
+       }
+
+       /*In cn23xx_setup_oq_regs*/
+       for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+               dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+                       "CN23XX_SLI_OQ_BASE_ADDR64", i,
+                       CVM_CAST64(CN23XX_SLI_OQ_BASE_ADDR64(i)),
+                       CVM_CAST64(octeon_read_csr64(
+                               oct, CN23XX_SLI_OQ_BASE_ADDR64(i))));
+               dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+                       "CN23XX_SLI_OQ_SIZE", i,
+                       CVM_CAST64(CN23XX_SLI_OQ_SIZE(i)),
+                       CVM_CAST64(octeon_read_csr
+                               (oct, CN23XX_SLI_OQ_SIZE(i))));
+               dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+                       "CN23XX_SLI_OQ_BUFF_INFO_SIZE", i,
+                       CVM_CAST64(CN23XX_SLI_OQ_BUFF_INFO_SIZE(i)),
+                       CVM_CAST64(octeon_read_csr(
+                               oct, CN23XX_SLI_OQ_BUFF_INFO_SIZE(i))));
+               dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+                       "CN23XX_SLI_OQ_PKTS_SENT", i,
+                       CVM_CAST64(CN23XX_SLI_OQ_PKTS_SENT(i)),
+                       CVM_CAST64(octeon_read_csr64(
+                               oct, CN23XX_SLI_OQ_PKTS_SENT(i))));
+               dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+                       "CN23XX_SLI_OQ_PKTS_CREDIT", i,
+                       CVM_CAST64(CN23XX_SLI_OQ_PKTS_CREDIT(i)),
+                       CVM_CAST64(octeon_read_csr64(
+                               oct, CN23XX_SLI_OQ_PKTS_CREDIT(i))));
+       }
+
+       dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+               "CN23XX_SLI_PKT_TIME_INT",
+               CVM_CAST64(CN23XX_SLI_PKT_TIME_INT),
+               CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_PKT_TIME_INT)));
+       dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+               "CN23XX_SLI_PKT_CNT_INT",
+               CVM_CAST64(CN23XX_SLI_PKT_CNT_INT),
+               CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_PKT_CNT_INT)));
+}
+
+static int cn23xx_pf_soft_reset(struct octeon_device *oct)
+{
+       octeon_write_csr64(oct, CN23XX_WIN_WR_MASK_REG, 0xFF);
+
+       dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: BIST enabled for CN23XX soft reset\n",
+               oct->octeon_id);
+
+       octeon_write_csr64(oct, CN23XX_SLI_SCRATCH1, 0x1234ULL);
+
+       /* Initiate chip-wide soft reset */
+       lio_pci_readq(oct, CN23XX_RST_SOFT_RST);
+       lio_pci_writeq(oct, 1, CN23XX_RST_SOFT_RST);
+
+       /* Wait for 100ms as Octeon resets. */
+       mdelay(100);
+
+       if (octeon_read_csr64(oct, CN23XX_SLI_SCRATCH1) == 0x1234ULL) {
+               dev_err(&oct->pci_dev->dev, "OCTEON[%d]: Soft reset failed\n",
+                       oct->octeon_id);
+               return 1;
+       }
+
+       dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: Reset completed\n",
+               oct->octeon_id);
+
+       /* restore the  reset value*/
+       octeon_write_csr64(oct, CN23XX_WIN_WR_MASK_REG, 0xFF);
+
+       return 0;
+}
+
+static void cn23xx_enable_error_reporting(struct octeon_device *oct)
+{
+       u32 regval;
+       u32 uncorrectable_err_mask, corrtable_err_status;
+
+       pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, &regval);
+       if (regval & CN23XX_CONFIG_PCIE_DEVCTL_MASK) {
+               uncorrectable_err_mask = 0;
+               corrtable_err_status = 0;
+               pci_read_config_dword(oct->pci_dev,
+                                     CN23XX_CONFIG_PCIE_UNCORRECT_ERR_MASK,
+                                     &uncorrectable_err_mask);
+               pci_read_config_dword(oct->pci_dev,
+                                     CN23XX_CONFIG_PCIE_CORRECT_ERR_STATUS,
+                                     &corrtable_err_status);
+               dev_err(&oct->pci_dev->dev, "PCI-E Fatal error detected;\n"
+                                "\tdev_ctl_status_reg = 0x%08x\n"
+                                "\tuncorrectable_error_mask_reg = 0x%08x\n"
+                                "\tcorrectable_error_status_reg = 0x%08x\n",
+                           regval, uncorrectable_err_mask,
+                           corrtable_err_status);
+       }
+
+       regval |= 0xf; /* Enable Link error reporting */
+
+       dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: Enabling PCI-E error reporting..\n",
+               oct->octeon_id);
+       pci_write_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, regval);
+}
+
+static u32 cn23xx_coprocessor_clock(struct octeon_device *oct)
+{
+       /* Bits 29:24 of RST_BOOT[PNR_MUL] holds the ref.clock MULTIPLIER
+        * for SLI.
+        */
+
+       /* TBD: get the info in Hand-shake */
+       return (((lio_pci_readq(oct, CN23XX_RST_BOOT) >> 24) & 0x3f) * 50);
+}
+
+u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us)
+{
+       /* This gives the SLI clock per microsec */
+       u32 oqticks_per_us = cn23xx_coprocessor_clock(oct);
+
+       oct->pfvf_hsword.coproc_tics_per_us = oqticks_per_us;
+
+       /* This gives the clock cycles per millisecond */
+       oqticks_per_us *= 1000;
+
+       /* This gives the oq ticks (1024 core clock cycles) per millisecond */
+       oqticks_per_us /= 1024;
+
+       /* time_intr is in microseconds. The next 2 steps gives the oq ticks
+        *  corressponding to time_intr.
+        */
+       oqticks_per_us *= time_intr_in_us;
+       oqticks_per_us /= 1000;
+
+       return oqticks_per_us;
+}
+
+static void cn23xx_setup_global_mac_regs(struct octeon_device *oct)
+{
+       u64 reg_val;
+       u16 mac_no = oct->pcie_port;
+       u16 pf_num = oct->pf_num;
+
+       /* programming SRN and TRS for each MAC(0..3)  */
+
+       dev_dbg(&oct->pci_dev->dev, "%s:Using pcie port %d\n",
+               __func__, mac_no);
+       /* By default, mapping all 64 IOQs to  a single MACs */
+
+       reg_val =
+           octeon_read_csr64(oct, CN23XX_SLI_PKT_MAC_RINFO64(mac_no, pf_num));
+
+       if (oct->rev_id == OCTEON_CN23XX_REV_1_1) {
+               /* setting SRN <6:0>  */
+               reg_val = pf_num * CN23XX_MAX_RINGS_PER_PF_PASS_1_1;
+       } else {
+               /* setting SRN <6:0>  */
+               reg_val = pf_num * CN23XX_MAX_RINGS_PER_PF;
+       }
+
+       /* setting TRS <23:16> */
+       reg_val = reg_val |
+                 (oct->sriov_info.trs << CN23XX_PKT_MAC_CTL_RINFO_TRS_BIT_POS);
+       /* write these settings to MAC register */
+       octeon_write_csr64(oct, CN23XX_SLI_PKT_MAC_RINFO64(mac_no, pf_num),
+                          reg_val);
+
+       dev_dbg(&oct->pci_dev->dev, "SLI_PKT_MAC(%d)_PF(%d)_RINFO : 0x%016llx\n",
+               mac_no, pf_num, (u64)octeon_read_csr64
+               (oct, CN23XX_SLI_PKT_MAC_RINFO64(mac_no, pf_num)));
+}
+
+static int cn23xx_reset_io_queues(struct octeon_device *oct)
+{
+       int ret_val = 0;
+       u64 d64;
+       u32 q_no, srn, ern;
+       u32 loop = 1000;
+
+       srn = oct->sriov_info.pf_srn;
+       ern = srn + oct->sriov_info.num_pf_rings;
+
+       /*As per HRM reg description, s/w cant write 0 to ENB. */
+       /*to make the queue off, need to set the RST bit. */
+
+       /* Reset the Enable bit for all the 64 IQs.  */
+       for (q_no = srn; q_no < ern; q_no++) {
+               /* set RST bit to 1. This bit applies to both IQ and OQ */
+               d64 = octeon_read_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+               d64 = d64 | CN23XX_PKT_INPUT_CTL_RST;
+               octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), d64);
+       }
+
+       /*wait until the RST bit is clear or the RST and quite bits are set*/
+       for (q_no = srn; q_no < ern; q_no++) {
+               u64 reg_val = octeon_read_csr64(oct,
+                                       CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+               while ((READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_RST) &&
+                      !(READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_QUIET) &&
+                      loop--) {
+                       WRITE_ONCE(reg_val, octeon_read_csr64(
+                           oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)));
+               }
+               if (!loop) {
+                       dev_err(&oct->pci_dev->dev,
+                               "clearing the reset reg failed or setting the quiet reg failed for qno: %u\n",
+                               q_no);
+                       return -1;
+               }
+               WRITE_ONCE(reg_val, READ_ONCE(reg_val) &
+                       ~CN23XX_PKT_INPUT_CTL_RST);
+               octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
+                                  READ_ONCE(reg_val));
+
+               WRITE_ONCE(reg_val, octeon_read_csr64(
+                          oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)));
+               if (READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_RST) {
+                       dev_err(&oct->pci_dev->dev,
+                               "clearing the reset failed for qno: %u\n",
+                               q_no);
+                       ret_val = -1;
+               }
+       }
+
+       return ret_val;
+}
+
+static int cn23xx_pf_setup_global_input_regs(struct octeon_device *oct)
+{
+       u32 q_no, ern, srn;
+       u64 pf_num;
+       u64 intr_threshold, reg_val;
+       struct octeon_instr_queue *iq;
+       struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+
+       pf_num = oct->pf_num;
+
+       srn = oct->sriov_info.pf_srn;
+       ern = srn + oct->sriov_info.num_pf_rings;
+
+       if (cn23xx_reset_io_queues(oct))
+               return -1;
+
+       /** Set the MAC_NUM and PVF_NUM in IQ_PKT_CONTROL reg
+       * for all queues.Only PF can set these bits.
+       * bits 29:30 indicate the MAC num.
+       * bits 32:47 indicate the PVF num.
+       */
+       for (q_no = 0; q_no < ern; q_no++) {
+               reg_val = oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS;
+               reg_val |= pf_num << CN23XX_PKT_INPUT_CTL_PF_NUM_POS;
+
+               octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
+                                  reg_val);
+       }
+
+       /* Select ES, RO, NS, RDSIZE,DPTR Fomat#0 for
+        * pf queues
+        */
+       for (q_no = srn; q_no < ern; q_no++) {
+               void __iomem *inst_cnt_reg;
+
+               iq = oct->instr_queue[q_no];
+               if (iq)
+                       inst_cnt_reg = iq->inst_cnt_reg;
+               else
+                       inst_cnt_reg = (u8 *)oct->mmio[0].hw_addr +
+                                      CN23XX_SLI_IQ_INSTR_COUNT64(q_no);
+
+               reg_val =
+                   octeon_read_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+
+               reg_val |= CN23XX_PKT_INPUT_CTL_MASK;
+
+               octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
+                                  reg_val);
+
+               /* Set WMARK level for triggering PI_INT */
+               /* intr_threshold = CN23XX_DEF_IQ_INTR_THRESHOLD & */
+               intr_threshold = CFG_GET_IQ_INTR_PKT(cn23xx->conf) &
+                                CN23XX_PKT_IN_DONE_WMARK_MASK;
+
+               writeq((readq(inst_cnt_reg) &
+                       ~(CN23XX_PKT_IN_DONE_WMARK_MASK <<
+                         CN23XX_PKT_IN_DONE_WMARK_BIT_POS)) |
+                      (intr_threshold << CN23XX_PKT_IN_DONE_WMARK_BIT_POS),
+                      inst_cnt_reg);
+       }
+       return 0;
+}
+
+static void cn23xx_pf_setup_global_output_regs(struct octeon_device *oct)
+{
+       u32 reg_val;
+       u32 q_no, ern, srn;
+       u64 time_threshold;
+
+       struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+
+       srn = oct->sriov_info.pf_srn;
+       ern = srn + oct->sriov_info.num_pf_rings;
+
+       if (CFG_GET_IS_SLI_BP_ON(cn23xx->conf)) {
+               octeon_write_csr64(oct, CN23XX_SLI_OQ_WMARK, 32);
+       } else {
+               /** Set Output queue watermark to 0 to disable backpressure */
+               octeon_write_csr64(oct, CN23XX_SLI_OQ_WMARK, 0);
+       }
+
+       for (q_no = srn; q_no < ern; q_no++) {
+               reg_val = octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no));
+
+               /* set IPTR & DPTR */
+               reg_val |=
+                   (CN23XX_PKT_OUTPUT_CTL_IPTR | CN23XX_PKT_OUTPUT_CTL_DPTR);
+
+               /* reset BMODE */
+               reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_BMODE);
+
+               /* No Relaxed Ordering, No Snoop, 64-bit Byte swap
+                * for Output Queue ScatterList
+                * reset ROR_P, NSR_P
+                */
+               reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ROR_P);
+               reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_NSR_P);
+
+#ifdef __LITTLE_ENDIAN_BITFIELD
+               reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ES_P);
+#else
+               reg_val |= (CN23XX_PKT_OUTPUT_CTL_ES_P);
+#endif
+               /* No Relaxed Ordering, No Snoop, 64-bit Byte swap
+                * for Output Queue Data
+                * reset ROR, NSR
+                */
+               reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ROR);
+               reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_NSR);
+               /* set the ES bit */
+               reg_val |= (CN23XX_PKT_OUTPUT_CTL_ES);
+
+               /* write all the selected settings */
+               octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no), reg_val);
+
+               /* Enabling these interrupt in oct->fn_list.enable_interrupt()
+                * routine which called after IOQ init.
+                * Set up interrupt packet and time thresholds
+                * for all the OQs
+                */
+               time_threshold = cn23xx_pf_get_oq_ticks(
+                   oct, (u32)CFG_GET_OQ_INTR_TIME(cn23xx->conf));
+
+               octeon_write_csr64(oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no),
+                                  (CFG_GET_OQ_INTR_PKT(cn23xx->conf) |
+                                   (time_threshold << 32)));
+       }
+
+       /** Setting the water mark level for pko back pressure **/
+       writeq(0x40, (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OQ_WMARK);
+
+       /** Disabling setting OQs in reset when ring has no dorebells
+         * enabling this will cause of head of line blocking
+         */
+       /* Do it only for pass1.1. and pass1.2 */
+       if ((oct->rev_id == OCTEON_CN23XX_REV_1_0) ||
+           (oct->rev_id == OCTEON_CN23XX_REV_1_1))
+               writeq(readq((u8 *)oct->mmio[0].hw_addr +
+                                    CN23XX_SLI_GBL_CONTROL) | 0x2,
+                      (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_GBL_CONTROL);
+
+       /** Enable channel-level backpressure */
+       if (oct->pf_num)
+               writeq(0xffffffffffffffffULL,
+                      (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OUT_BP_EN2_W1S);
+       else
+               writeq(0xffffffffffffffffULL,
+                      (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OUT_BP_EN_W1S);
+}
+
+static int cn23xx_setup_pf_device_regs(struct octeon_device *oct)
+{
+       cn23xx_enable_error_reporting(oct);
+
+       /* program the MAC(0..3)_RINFO before setting up input/output regs */
+       cn23xx_setup_global_mac_regs(oct);
+
+       if (cn23xx_pf_setup_global_input_regs(oct))
+               return -1;
+
+       cn23xx_pf_setup_global_output_regs(oct);
+
+       /* Default error timeout value should be 0x200000 to avoid host hang
+        * when reads invalid register
+        */
+       octeon_write_csr64(oct, CN23XX_SLI_WINDOW_CTL,
+                          CN23XX_SLI_WINDOW_CTL_DEFAULT);
+
+       /* set SLI_PKT_IN_JABBER to handle large VXLAN packets */
+       octeon_write_csr64(oct, CN23XX_SLI_PKT_IN_JABBER, CN23XX_INPUT_JABBER);
+       return 0;
+}
+
+static void cn23xx_setup_iq_regs(struct octeon_device *oct, u32 iq_no)
+{
+       struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
+       u64 pkt_in_done;
+
+       iq_no += oct->sriov_info.pf_srn;
+
+       /* Write the start of the input queue's ring and its size  */
+       octeon_write_csr64(oct, CN23XX_SLI_IQ_BASE_ADDR64(iq_no),
+                          iq->base_addr_dma);
+       octeon_write_csr(oct, CN23XX_SLI_IQ_SIZE(iq_no), iq->max_count);
+
+       /* Remember the doorbell & instruction count register addr
+        * for this queue
+        */
+       iq->doorbell_reg =
+           (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_IQ_DOORBELL(iq_no);
+       iq->inst_cnt_reg =
+           (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_IQ_INSTR_COUNT64(iq_no);
+       dev_dbg(&oct->pci_dev->dev, "InstQ[%d]:dbell reg @ 0x%p instcnt_reg @ 0x%p\n",
+               iq_no, iq->doorbell_reg, iq->inst_cnt_reg);
+
+       /* Store the current instruction counter (used in flush_iq
+        * calculation)
+        */
+       pkt_in_done = readq(iq->inst_cnt_reg);
+
+       if (oct->msix_on) {
+               /* Set CINT_ENB to enable IQ interrupt   */
+               writeq((pkt_in_done | CN23XX_INTR_CINT_ENB),
+                      iq->inst_cnt_reg);
+       } else {
+               /* Clear the count by writing back what we read, but don't
+                * enable interrupts
+                */
+               writeq(pkt_in_done, iq->inst_cnt_reg);
+       }
+
+       iq->reset_instr_cnt = 0;
+}
+
+static void cn23xx_setup_oq_regs(struct octeon_device *oct, u32 oq_no)
+{
+       u32 reg_val;
+       struct octeon_droq *droq = oct->droq[oq_no];
+       struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+       u64 time_threshold;
+       u64 cnt_threshold;
+
+       oq_no += oct->sriov_info.pf_srn;
+
+       octeon_write_csr64(oct, CN23XX_SLI_OQ_BASE_ADDR64(oq_no),
+                          droq->desc_ring_dma);
+       octeon_write_csr(oct, CN23XX_SLI_OQ_SIZE(oq_no), droq->max_count);
+
+       octeon_write_csr(oct, CN23XX_SLI_OQ_BUFF_INFO_SIZE(oq_no),
+                        (droq->buffer_size | (OCT_RH_SIZE << 16)));
+
+       /* Get the mapped address of the pkt_sent and pkts_credit regs */
+       droq->pkts_sent_reg =
+           (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OQ_PKTS_SENT(oq_no);
+       droq->pkts_credit_reg =
+           (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OQ_PKTS_CREDIT(oq_no);
+
+       if (!oct->msix_on) {
+               /* Enable this output queue to generate Packet Timer Interrupt
+                */
+               reg_val =
+                   octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no));
+               reg_val |= CN23XX_PKT_OUTPUT_CTL_TENB;
+               octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no),
+                                reg_val);
+
+               /* Enable this output queue to generate Packet Count Interrupt
+                */
+               reg_val =
+                   octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no));
+               reg_val |= CN23XX_PKT_OUTPUT_CTL_CENB;
+               octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no),
+                                reg_val);
+       } else {
+               time_threshold = cn23xx_pf_get_oq_ticks(
+                   oct, (u32)CFG_GET_OQ_INTR_TIME(cn23xx->conf));
+               cnt_threshold = (u32)CFG_GET_OQ_INTR_PKT(cn23xx->conf);
+
+               octeon_write_csr64(
+                   oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(oq_no),
+                   ((time_threshold << 32 | cnt_threshold)));
+       }
+}
+
+static int cn23xx_enable_io_queues(struct octeon_device *oct)
+{
+       u64 reg_val;
+       u32 srn, ern, q_no;
+       u32 loop = 1000;
+
+       srn = oct->sriov_info.pf_srn;
+       ern = srn + oct->num_iqs;
+
+       for (q_no = srn; q_no < ern; q_no++) {
+               /* set the corresponding IQ IS_64B bit */
+               if (oct->io_qmask.iq64B & BIT_ULL(q_no - srn)) {
+                       reg_val = octeon_read_csr64(
+                           oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+                       reg_val = reg_val | CN23XX_PKT_INPUT_CTL_IS_64B;
+                       octeon_write_csr64(
+                           oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), reg_val);
+               }
+
+               /* set the corresponding IQ ENB bit */
+               if (oct->io_qmask.iq & BIT_ULL(q_no - srn)) {
+                       /* IOQs are in reset by default in PEM2 mode,
+                        * clearing reset bit
+                        */
+                       reg_val = octeon_read_csr64(
+                           oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+
+                       if (reg_val & CN23XX_PKT_INPUT_CTL_RST) {
+                               while ((reg_val & CN23XX_PKT_INPUT_CTL_RST) &&
+                                      !(reg_val &
+                                        CN23XX_PKT_INPUT_CTL_QUIET) &&
+                                      loop--) {
+                                       reg_val = octeon_read_csr64(
+                                           oct,
+                                           CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+                               }
+                               if (!loop) {
+                                       dev_err(&oct->pci_dev->dev,
+                                               "clearing the reset reg failed or setting the quiet reg failed for qno: %u\n",
+                                               q_no);
+                                       return -1;
+                               }
+                               reg_val = reg_val & ~CN23XX_PKT_INPUT_CTL_RST;
+                               octeon_write_csr64(
+                                   oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
+                                   reg_val);
+
+                               reg_val = octeon_read_csr64(
+                                   oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+                               if (reg_val & CN23XX_PKT_INPUT_CTL_RST) {
+                                       dev_err(&oct->pci_dev->dev,
+                                               "clearing the reset failed for qno: %u\n",
+                                               q_no);
+                                       return -1;
+                               }
+                       }
+                       reg_val = octeon_read_csr64(
+                           oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+                       reg_val = reg_val | CN23XX_PKT_INPUT_CTL_RING_ENB;
+                       octeon_write_csr64(
+                           oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), reg_val);
+               }
+       }
+       for (q_no = srn; q_no < ern; q_no++) {
+               u32 reg_val;
+               /* set the corresponding OQ ENB bit */
+               if (oct->io_qmask.oq & BIT_ULL(q_no - srn)) {
+                       reg_val = octeon_read_csr(
+                           oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no));
+                       reg_val = reg_val | CN23XX_PKT_OUTPUT_CTL_RING_ENB;
+                       octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no),
+                                        reg_val);
+               }
+       }
+       return 0;
+}
+
+static void cn23xx_disable_io_queues(struct octeon_device *oct)
+{
+       int q_no, loop;
+       u64 d64;
+       u32 d32;
+       u32 srn, ern;
+
+       srn = oct->sriov_info.pf_srn;
+       ern = srn + oct->num_iqs;
+
+       /*** Disable Input Queues. ***/
+       for (q_no = srn; q_no < ern; q_no++) {
+               loop = HZ;
+
+               /* start the Reset for a particular ring */
+               WRITE_ONCE(d64, octeon_read_csr64(
+                          oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)));
+               WRITE_ONCE(d64, READ_ONCE(d64) &
+                                       (~(CN23XX_PKT_INPUT_CTL_RING_ENB)));
+               WRITE_ONCE(d64, READ_ONCE(d64) | CN23XX_PKT_INPUT_CTL_RST);
+               octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
+                                  READ_ONCE(d64));
+
+               /* Wait until hardware indicates that the particular IQ
+                * is out of reset.
+                */
+               WRITE_ONCE(d64, octeon_read_csr64(
+                                       oct, CN23XX_SLI_PKT_IOQ_RING_RST));
+               while (!(READ_ONCE(d64) & BIT_ULL(q_no)) && loop--) {
+                       WRITE_ONCE(d64, octeon_read_csr64(
+                                       oct, CN23XX_SLI_PKT_IOQ_RING_RST));
+                       schedule_timeout_uninterruptible(1);
+               }
+
+               /* Reset the doorbell register for this Input Queue. */
+               octeon_write_csr(oct, CN23XX_SLI_IQ_DOORBELL(q_no), 0xFFFFFFFF);
+               while (octeon_read_csr64(oct, CN23XX_SLI_IQ_DOORBELL(q_no)) &&
+                      loop--) {
+                       schedule_timeout_uninterruptible(1);
+               }
+       }
+
+       /*** Disable Output Queues. ***/
+       for (q_no = srn; q_no < ern; q_no++) {
+               loop = HZ;
+
+               /* Wait until hardware indicates that the particular IQ
+                * is out of reset.It given that SLI_PKT_RING_RST is
+                * common for both IQs and OQs
+                */
+               WRITE_ONCE(d64, octeon_read_csr64(
+                                       oct, CN23XX_SLI_PKT_IOQ_RING_RST));
+               while (!(READ_ONCE(d64) & BIT_ULL(q_no)) && loop--) {
+                       WRITE_ONCE(d64, octeon_read_csr64(
+                                       oct, CN23XX_SLI_PKT_IOQ_RING_RST));
+                       schedule_timeout_uninterruptible(1);
+               }
+
+               /* Reset the doorbell register for this Output Queue. */
+               octeon_write_csr(oct, CN23XX_SLI_OQ_PKTS_CREDIT(q_no),
+                                0xFFFFFFFF);
+               while (octeon_read_csr64(oct,
+                                        CN23XX_SLI_OQ_PKTS_CREDIT(q_no)) &&
+                      loop--) {
+                       schedule_timeout_uninterruptible(1);
+               }
+
+               /* clear the SLI_PKT(0..63)_CNTS[CNT] reg value */
+               WRITE_ONCE(d32, octeon_read_csr(
+                                       oct, CN23XX_SLI_OQ_PKTS_SENT(q_no)));
+               octeon_write_csr(oct, CN23XX_SLI_OQ_PKTS_SENT(q_no),
+                                READ_ONCE(d32));
+       }
+}
+
+static u64 cn23xx_pf_msix_interrupt_handler(void *dev)
+{
+       struct octeon_ioq_vector *ioq_vector = (struct octeon_ioq_vector *)dev;
+       struct octeon_device *oct = ioq_vector->oct_dev;
+       u64 pkts_sent;
+       u64 ret = 0;
+       struct octeon_droq *droq = oct->droq[ioq_vector->droq_index];
+
+       dev_dbg(&oct->pci_dev->dev, "In %s octeon_dev @ %p\n", __func__, oct);
+
+       if (!droq) {
+               dev_err(&oct->pci_dev->dev, "23XX bringup FIXME: oct pfnum:%d ioq_vector->ioq_num :%d droq is NULL\n",
+                       oct->pf_num, ioq_vector->ioq_num);
+               return 0;
+       }
+
+       pkts_sent = readq(droq->pkts_sent_reg);
+
+       /* If our device has interrupted, then proceed. Also check
+        * for all f's if interrupt was triggered on an error
+        * and the PCI read fails.
+        */
+       if (!pkts_sent || (pkts_sent == 0xFFFFFFFFFFFFFFFFULL))
+               return ret;
+
+       /* Write count reg in sli_pkt_cnts to clear these int.*/
+       if ((pkts_sent & CN23XX_INTR_PO_INT) ||
+           (pkts_sent & CN23XX_INTR_PI_INT)) {
+               if (pkts_sent & CN23XX_INTR_PO_INT)
+                       ret |= MSIX_PO_INT;
+       }
+
+       if (pkts_sent & CN23XX_INTR_PI_INT)
+               /* We will clear the count when we update the read_index. */
+               ret |= MSIX_PI_INT;
+
+       /* Never need to handle msix mbox intr for pf. They arrive on the last
+        * msix
+        */
+       return ret;
+}
+
+static irqreturn_t cn23xx_interrupt_handler(void *dev)
+{
+       struct octeon_device *oct = (struct octeon_device *)dev;
+       struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+       u64 intr64;
+
+       dev_dbg(&oct->pci_dev->dev, "In %s octeon_dev @ %p\n", __func__, oct);
+       intr64 = readq(cn23xx->intr_sum_reg64);
+
+       oct->int_status = 0;
+
+       if (intr64 & CN23XX_INTR_ERR)
+               dev_err(&oct->pci_dev->dev, "OCTEON[%d]: Error Intr: 0x%016llx\n",
+                       oct->octeon_id, CVM_CAST64(intr64));
+
+       if (oct->msix_on != LIO_FLAG_MSIX_ENABLED) {
+               if (intr64 & CN23XX_INTR_PKT_DATA)
+                       oct->int_status |= OCT_DEV_INTR_PKT_DATA;
+       }
+
+       if (intr64 & (CN23XX_INTR_DMA0_FORCE))
+               oct->int_status |= OCT_DEV_INTR_DMA0_FORCE;
+       if (intr64 & (CN23XX_INTR_DMA1_FORCE))
+               oct->int_status |= OCT_DEV_INTR_DMA1_FORCE;
+
+       /* Clear the current interrupts */
+       writeq(intr64, cn23xx->intr_sum_reg64);
+
+       return IRQ_HANDLED;
+}
+
+static void cn23xx_bar1_idx_setup(struct octeon_device *oct, u64 core_addr,
+                                 u32 idx, int valid)
+{
+       u64 bar1;
+       u64 reg_adr;
+
+       if (!valid) {
+               reg_adr = lio_pci_readq(
+                       oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+               WRITE_ONCE(bar1, reg_adr);
+               lio_pci_writeq(oct, (READ_ONCE(bar1) & 0xFFFFFFFEULL),
+                              CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+               reg_adr = lio_pci_readq(
+                       oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+               WRITE_ONCE(bar1, reg_adr);
+               return;
+       }
+
+       /*  The PEM(0..3)_BAR1_INDEX(0..15)[ADDR_IDX]<23:4> stores
+        *  bits <41:22> of the Core Addr
+        */
+       lio_pci_writeq(oct, (((core_addr >> 22) << 4) | PCI_BAR1_MASK),
+                      CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+
+       WRITE_ONCE(bar1, lio_pci_readq(
+                  oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)));
+}
+
+static void cn23xx_bar1_idx_write(struct octeon_device *oct, u32 idx, u32 mask)
+{
+       lio_pci_writeq(oct, mask,
+                      CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+}
+
+static u32 cn23xx_bar1_idx_read(struct octeon_device *oct, u32 idx)
+{
+       return (u32)lio_pci_readq(
+           oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+}
+
+/* always call with lock held */
+static u32 cn23xx_update_read_index(struct octeon_instr_queue *iq)
+{
+       u32 new_idx;
+       u32 last_done;
+       u32 pkt_in_done = readl(iq->inst_cnt_reg);
+
+       last_done = pkt_in_done - iq->pkt_in_done;
+       iq->pkt_in_done = pkt_in_done;
+
+       /* Modulo of the new index with the IQ size will give us
+        * the new index.  The iq->reset_instr_cnt is always zero for
+        * cn23xx, so no extra adjustments are needed.
+        */
+       new_idx = (iq->octeon_read_index +
+                  (u32)(last_done & CN23XX_PKT_IN_DONE_CNT_MASK)) %
+                 iq->max_count;
+
+       return new_idx;
+}
+
+static void cn23xx_enable_pf_interrupt(struct octeon_device *oct, u8 intr_flag)
+{
+       struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+       u64 intr_val = 0;
+
+       /*  Divide the single write to multiple writes based on the flag. */
+       /* Enable Interrupt */
+       if (intr_flag == OCTEON_ALL_INTR) {
+               writeq(cn23xx->intr_mask64, cn23xx->intr_enb_reg64);
+       } else if (intr_flag & OCTEON_OUTPUT_INTR) {
+               intr_val = readq(cn23xx->intr_enb_reg64);
+               intr_val |= CN23XX_INTR_PKT_DATA;
+               writeq(intr_val, cn23xx->intr_enb_reg64);
+       }
+}
+
+static void cn23xx_disable_pf_interrupt(struct octeon_device *oct, u8 intr_flag)
+{
+       struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+       u64 intr_val = 0;
+
+       /* Disable Interrupts */
+       if (intr_flag == OCTEON_ALL_INTR) {
+               writeq(0, cn23xx->intr_enb_reg64);
+       } else if (intr_flag & OCTEON_OUTPUT_INTR) {
+               intr_val = readq(cn23xx->intr_enb_reg64);
+               intr_val &= ~CN23XX_INTR_PKT_DATA;
+               writeq(intr_val, cn23xx->intr_enb_reg64);
+       }
+}
+
+static void cn23xx_get_pcie_qlmport(struct octeon_device *oct)
+{
+       oct->pcie_port = (octeon_read_csr(oct, CN23XX_SLI_MAC_NUMBER)) & 0xff;
+
+       dev_dbg(&oct->pci_dev->dev, "OCTEON: CN23xx uses PCIE Port %d\n",
+               oct->pcie_port);
+}
+
+static void cn23xx_get_pf_num(struct octeon_device *oct)
+{
+       u32 fdl_bit = 0;
+
+       /** Read Function Dependency Link reg to get the function number */
+       pci_read_config_dword(oct->pci_dev, CN23XX_PCIE_SRIOV_FDL, &fdl_bit);
+       oct->pf_num = ((fdl_bit >> CN23XX_PCIE_SRIOV_FDL_BIT_POS) &
+                      CN23XX_PCIE_SRIOV_FDL_MASK);
+}
+
+static void cn23xx_setup_reg_address(struct octeon_device *oct)
+{
+       u8 __iomem *bar0_pciaddr = oct->mmio[0].hw_addr;
+       struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+
+       oct->reg_list.pci_win_wr_addr_hi =
+           (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_ADDR_HI);
+       oct->reg_list.pci_win_wr_addr_lo =
+           (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_ADDR_LO);
+       oct->reg_list.pci_win_wr_addr =
+           (u64 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_ADDR64);
+
+       oct->reg_list.pci_win_rd_addr_hi =
+           (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_ADDR_HI);
+       oct->reg_list.pci_win_rd_addr_lo =
+           (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_ADDR_LO);
+       oct->reg_list.pci_win_rd_addr =
+           (u64 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_ADDR64);
+
+       oct->reg_list.pci_win_wr_data_hi =
+           (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_DATA_HI);
+       oct->reg_list.pci_win_wr_data_lo =
+           (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_DATA_LO);
+       oct->reg_list.pci_win_wr_data =
+           (u64 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_DATA64);
+
+       oct->reg_list.pci_win_rd_data_hi =
+           (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_DATA_HI);
+       oct->reg_list.pci_win_rd_data_lo =
+           (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_DATA_LO);
+       oct->reg_list.pci_win_rd_data =
+           (u64 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_DATA64);
+
+       cn23xx_get_pcie_qlmport(oct);
+
+       cn23xx->intr_mask64 = CN23XX_INTR_MASK;
+       if (!oct->msix_on)
+               cn23xx->intr_mask64 |= CN23XX_INTR_PKT_TIME;
+       if (oct->rev_id >= OCTEON_CN23XX_REV_1_1)
+               cn23xx->intr_mask64 |= CN23XX_INTR_VF_MBOX;
+
+       cn23xx->intr_sum_reg64 =
+           bar0_pciaddr +
+           CN23XX_SLI_MAC_PF_INT_SUM64(oct->pcie_port, oct->pf_num);
+       cn23xx->intr_enb_reg64 =
+           bar0_pciaddr +
+           CN23XX_SLI_MAC_PF_INT_ENB64(oct->pcie_port, oct->pf_num);
+}
+
+static int cn23xx_sriov_config(struct octeon_device *oct)
+{
+       u32 total_rings;
+       struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+       /* num_vfs is already filled for us */
+       u32 pf_srn, num_pf_rings;
+
+       cn23xx->conf =
+           (struct octeon_config *)oct_get_config_info(oct, LIO_23XX);
+       switch (oct->rev_id) {
+       case OCTEON_CN23XX_REV_1_0:
+               total_rings = CN23XX_MAX_RINGS_PER_PF_PASS_1_0;
+               break;
+       case OCTEON_CN23XX_REV_1_1:
+               total_rings = CN23XX_MAX_RINGS_PER_PF_PASS_1_1;
+               break;
+       default:
+               total_rings = CN23XX_MAX_RINGS_PER_PF;
+               break;
+       }
+       if (!oct->sriov_info.num_pf_rings) {
+               if (total_rings > num_present_cpus())
+                       num_pf_rings = num_present_cpus();
+               else
+                       num_pf_rings = total_rings;
+       } else {
+               num_pf_rings = oct->sriov_info.num_pf_rings;
+
+               if (num_pf_rings > total_rings) {
+                       dev_warn(&oct->pci_dev->dev,
+                                "num_queues_per_pf requested %u is more than available rings. Reducing to %u\n",
+                                num_pf_rings, total_rings);
+                       num_pf_rings = total_rings;
+               }
+       }
+
+       total_rings = num_pf_rings;
+       /* the first ring of the pf */
+       pf_srn = total_rings - num_pf_rings;
+
+       oct->sriov_info.trs = total_rings;
+       oct->sriov_info.pf_srn = pf_srn;
+       oct->sriov_info.num_pf_rings = num_pf_rings;
+       dev_dbg(&oct->pci_dev->dev, "trs:%d pf_srn:%d num_pf_rings:%d\n",
+               oct->sriov_info.trs, oct->sriov_info.pf_srn,
+               oct->sriov_info.num_pf_rings);
+       return 0;
+}
+
+int setup_cn23xx_octeon_pf_device(struct octeon_device *oct)
+{
+       if (octeon_map_pci_barx(oct, 0, 0))
+               return 1;
+
+       if (octeon_map_pci_barx(oct, 1, MAX_BAR1_IOREMAP_SIZE)) {
+               dev_err(&oct->pci_dev->dev, "%s CN23XX BAR1 map failed\n",
+                       __func__);
+               octeon_unmap_pci_barx(oct, 0);
+               return 1;
+       }
+
+       cn23xx_get_pf_num(oct);
+
+       if (cn23xx_sriov_config(oct)) {
+               octeon_unmap_pci_barx(oct, 0);
+               octeon_unmap_pci_barx(oct, 1);
+               return 1;
+       }
+
+       octeon_write_csr64(oct, CN23XX_SLI_MAC_CREDIT_CNT, 0x3F802080802080ULL);
+
+       oct->fn_list.setup_iq_regs = cn23xx_setup_iq_regs;
+       oct->fn_list.setup_oq_regs = cn23xx_setup_oq_regs;
+       oct->fn_list.process_interrupt_regs = cn23xx_interrupt_handler;
+       oct->fn_list.msix_interrupt_handler = cn23xx_pf_msix_interrupt_handler;
+
+       oct->fn_list.soft_reset = cn23xx_pf_soft_reset;
+       oct->fn_list.setup_device_regs = cn23xx_setup_pf_device_regs;
+       oct->fn_list.update_iq_read_idx = cn23xx_update_read_index;
+
+       oct->fn_list.bar1_idx_setup = cn23xx_bar1_idx_setup;
+       oct->fn_list.bar1_idx_write = cn23xx_bar1_idx_write;
+       oct->fn_list.bar1_idx_read = cn23xx_bar1_idx_read;
+
+       oct->fn_list.enable_interrupt = cn23xx_enable_pf_interrupt;
+       oct->fn_list.disable_interrupt = cn23xx_disable_pf_interrupt;
+
+       oct->fn_list.enable_io_queues = cn23xx_enable_io_queues;
+       oct->fn_list.disable_io_queues = cn23xx_disable_io_queues;
+
+       cn23xx_setup_reg_address(oct);
+
+       oct->coproc_clock_rate = 1000000ULL * cn23xx_coprocessor_clock(oct);
+
+       return 0;
+}
+
+int validate_cn23xx_pf_config_info(struct octeon_device *oct,
+                                  struct octeon_config *conf23xx)
+{
+       if (CFG_GET_IQ_MAX_Q(conf23xx) > CN23XX_MAX_INPUT_QUEUES) {
+               dev_err(&oct->pci_dev->dev, "%s: Num IQ (%d) exceeds Max (%d)\n",
+                       __func__, CFG_GET_IQ_MAX_Q(conf23xx),
+                       CN23XX_MAX_INPUT_QUEUES);
+               return 1;
+       }
+
+       if (CFG_GET_OQ_MAX_Q(conf23xx) > CN23XX_MAX_OUTPUT_QUEUES) {
+               dev_err(&oct->pci_dev->dev, "%s: Num OQ (%d) exceeds Max (%d)\n",
+                       __func__, CFG_GET_OQ_MAX_Q(conf23xx),
+                       CN23XX_MAX_OUTPUT_QUEUES);
+               return 1;
+       }
+
+       if (CFG_GET_IQ_INSTR_TYPE(conf23xx) != OCTEON_32BYTE_INSTR &&
+           CFG_GET_IQ_INSTR_TYPE(conf23xx) != OCTEON_64BYTE_INSTR) {
+               dev_err(&oct->pci_dev->dev, "%s: Invalid instr type for IQ\n",
+                       __func__);
+               return 1;
+       }
+
+       if (!(CFG_GET_OQ_INFO_PTR(conf23xx)) ||
+           !(CFG_GET_OQ_REFILL_THRESHOLD(conf23xx))) {
+               dev_err(&oct->pci_dev->dev, "%s: Invalid parameter for OQ\n",
+                       __func__);
+               return 1;
+       }
+
+       if (!(CFG_GET_OQ_INTR_TIME(conf23xx))) {
+               dev_err(&oct->pci_dev->dev, "%s: Invalid parameter for OQ\n",
+                       __func__);
+               return 1;
+       }
+
+       return 0;
+}
+
+void cn23xx_dump_iq_regs(struct octeon_device *oct)
+{
+       u32 regval, q_no;
+
+       dev_dbg(&oct->pci_dev->dev, "SLI_IQ_DOORBELL_0 [0x%x]: 0x%016llx\n",
+               CN23XX_SLI_IQ_DOORBELL(0),
+               CVM_CAST64(octeon_read_csr64
+                       (oct, CN23XX_SLI_IQ_DOORBELL(0))));
+
+       dev_dbg(&oct->pci_dev->dev, "SLI_IQ_BASEADDR_0 [0x%x]: 0x%016llx\n",
+               CN23XX_SLI_IQ_BASE_ADDR64(0),
+               CVM_CAST64(octeon_read_csr64
+                       (oct, CN23XX_SLI_IQ_BASE_ADDR64(0))));
+
+       dev_dbg(&oct->pci_dev->dev, "SLI_IQ_FIFO_RSIZE_0 [0x%x]: 0x%016llx\n",
+               CN23XX_SLI_IQ_SIZE(0),
+               CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_IQ_SIZE(0))));
+
+       dev_dbg(&oct->pci_dev->dev, "SLI_CTL_STATUS [0x%x]: 0x%016llx\n",
+               CN23XX_SLI_CTL_STATUS,
+               CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_CTL_STATUS)));
+
+       for (q_no = 0; q_no < CN23XX_MAX_INPUT_QUEUES; q_no++) {
+               dev_dbg(&oct->pci_dev->dev, "SLI_PKT[%d]_INPUT_CTL [0x%x]: 0x%016llx\n",
+                       q_no, CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
+                       CVM_CAST64(octeon_read_csr64
+                               (oct,
+                                       CN23XX_SLI_IQ_PKT_CONTROL64(q_no))));
+       }
+
+       pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, &regval);
+       dev_dbg(&oct->pci_dev->dev, "Config DevCtl [0x%x]: 0x%08x\n",
+               CN23XX_CONFIG_PCIE_DEVCTL, regval);
+
+       dev_dbg(&oct->pci_dev->dev, "SLI_PRT[%d]_CFG [0x%llx]: 0x%016llx\n",
+               oct->pcie_port, CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port),
+               CVM_CAST64(lio_pci_readq(
+                       oct, CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port))));
+
+       dev_dbg(&oct->pci_dev->dev, "SLI_S2M_PORT[%d]_CTL [0x%x]: 0x%016llx\n",
+               oct->pcie_port, CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port),
+               CVM_CAST64(octeon_read_csr64(
+                       oct, CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port))));
+}
+
+int cn23xx_fw_loaded(struct octeon_device *oct)
+{
+       u64 val;
+
+       val = octeon_read_csr64(oct, CN23XX_SLI_SCRATCH1);
+       return (val >> 1) & 1ULL;
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
new file mode 100644 (file)
index 0000000..21b5c90
--- /dev/null
@@ -0,0 +1,59 @@
+/**********************************************************************
+* Author: Cavium, Inc.
+*
+* Contact: support@cavium.com
+*          Please include "LiquidIO" in the subject.
+*
+* Copyright (c) 2003-2015 Cavium, Inc.
+*
+* This file is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License, Version 2, as
+* published by the Free Software Foundation.
+*
+* This file is distributed in the hope that it will be useful, but
+* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+* NONINFRINGEMENT.  See the GNU General Public License for more
+* details.
+*
+* This file may also be available under a different license from Cavium.
+* Contact Cavium, Inc. for more information
+**********************************************************************/
+
+/*! \file  cn23xx_device.h
+ * \brief Host Driver: Routines that perform CN23XX specific operations.
+*/
+
+#ifndef __CN23XX_PF_DEVICE_H__
+#define __CN23XX_PF_DEVICE_H__
+
+#include "cn23xx_pf_regs.h"
+
+/* Register address and configuration for a CN23XX devices.
+ * If device specific changes need to be made then add a struct to include
+ * device specific fields as shown in the commented section
+ */
+struct octeon_cn23xx_pf {
+       /** PCI interrupt summary register */
+       u8 __iomem *intr_sum_reg64;
+
+       /** PCI interrupt enable register */
+       u8 __iomem *intr_enb_reg64;
+
+       /** The PCI interrupt mask used by interrupt handler */
+       u64 intr_mask64;
+
+       struct octeon_config *conf;
+};
+
+int setup_cn23xx_octeon_pf_device(struct octeon_device *oct);
+
+int validate_cn23xx_pf_config_info(struct octeon_device *oct,
+                                  struct octeon_config *conf23xx);
+
+u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us);
+
+void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct);
+
+int cn23xx_fw_loaded(struct octeon_device *oct);
+#endif
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h
new file mode 100644 (file)
index 0000000..03d79d9
--- /dev/null
@@ -0,0 +1,604 @@
+/**********************************************************************
+* Author: Cavium, Inc.
+*
+* Contact: support@cavium.com
+*          Please include "LiquidIO" in the subject.
+*
+* Copyright (c) 2003-2015 Cavium, Inc.
+*
+* This file is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License, Version 2, as
+* published by the Free Software Foundation.
+*
+* This file is distributed in the hope that it will be useful, but
+* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+* NONINFRINGEMENT.  See the GNU General Public License for more
+* details.
+*
+* This file may also be available under a different license from Cavium.
+* Contact Cavium, Inc. for more information
+**********************************************************************/
+
+/*! \file cn23xx_regs.h
+ * \brief Host Driver: Register Address and Register Mask values for
+ * Octeon CN23XX devices.
+*/
+
+#ifndef __CN23XX_PF_REGS_H__
+#define __CN23XX_PF_REGS_H__
+
+#define     CN23XX_CONFIG_VENDOR_ID    0x00
+#define     CN23XX_CONFIG_DEVICE_ID    0x02
+
+#define     CN23XX_CONFIG_XPANSION_BAR             0x38
+
+#define     CN23XX_CONFIG_MSIX_CAP                0x50
+#define     CN23XX_CONFIG_MSIX_LMSI               0x54
+#define     CN23XX_CONFIG_MSIX_UMSI               0x58
+#define     CN23XX_CONFIG_MSIX_MSIMD              0x5C
+#define     CN23XX_CONFIG_MSIX_MSIMM              0x60
+#define     CN23XX_CONFIG_MSIX_MSIMP              0x64
+
+#define     CN23XX_CONFIG_PCIE_CAP                 0x70
+#define     CN23XX_CONFIG_PCIE_DEVCAP              0x74
+#define     CN23XX_CONFIG_PCIE_DEVCTL              0x78
+#define     CN23XX_CONFIG_PCIE_LINKCAP             0x7C
+#define     CN23XX_CONFIG_PCIE_LINKCTL             0x80
+#define     CN23XX_CONFIG_PCIE_SLOTCAP             0x84
+#define     CN23XX_CONFIG_PCIE_SLOTCTL             0x88
+#define     CN23XX_CONFIG_PCIE_DEVCTL2             0x98
+#define     CN23XX_CONFIG_PCIE_LINKCTL2            0xA0
+#define     CN23XX_CONFIG_PCIE_UNCORRECT_ERR_MASK  0x108
+#define     CN23XX_CONFIG_PCIE_CORRECT_ERR_STATUS  0x110
+#define     CN23XX_CONFIG_PCIE_DEVCTL_MASK         0x00040000
+
+#define     CN23XX_PCIE_SRIOV_FDL                 0x188
+#define     CN23XX_PCIE_SRIOV_FDL_BIT_POS         0x10
+#define     CN23XX_PCIE_SRIOV_FDL_MASK            0xFF
+
+#define     CN23XX_CONFIG_PCIE_FLTMSK              0x720
+
+#define     CN23XX_CONFIG_SRIOV_VFDEVID            0x190
+
+#define     CN23XX_CONFIG_SRIOV_BAR_START         0x19C
+#define     CN23XX_CONFIG_SRIOV_BARX(i)                \
+               (CN23XX_CONFIG_SRIOV_BAR_START + (i * 4))
+#define     CN23XX_CONFIG_SRIOV_BAR_PF            0x08
+#define     CN23XX_CONFIG_SRIOV_BAR_64BIT         0x04
+#define     CN23XX_CONFIG_SRIOV_BAR_IO            0x01
+
+/* ##############  BAR0 Registers ################ */
+
+#define    CN23XX_SLI_CTL_PORT_START               0x286E0
+#define    CN23XX_PORT_OFFSET                      0x10
+
+#define    CN23XX_SLI_CTL_PORT(p)                  \
+               (CN23XX_SLI_CTL_PORT_START + ((p) * CN23XX_PORT_OFFSET))
+
+/* 2 scatch registers (64-bit)  */
+#define    CN23XX_SLI_WINDOW_CTL                   0x282E0
+#define    CN23XX_SLI_SCRATCH1                     0x283C0
+#define    CN23XX_SLI_SCRATCH2                     0x283D0
+#define    CN23XX_SLI_WINDOW_CTL_DEFAULT           0x200000ULL
+
+/* 1 registers (64-bit)  - SLI_CTL_STATUS */
+#define    CN23XX_SLI_CTL_STATUS                   0x28570
+
+/* SLI Packet Input Jabber Register (64 bit register)
+ * <31:0> for Byte count for limiting sizes of packet sizes
+ * that are allowed for sli packet inbound packets.
+ * the default value is 0xFA00(=64000).
+ */
+#define    CN23XX_SLI_PKT_IN_JABBER                0x29170
+/* The input jabber is used to determine the TSO max size.
+ * Due to H/W limitation, this need to be reduced to 60000
+ * in order to to H/W TSO and avoid the WQE malfarmation
+ * PKO_BUG_24989_WQE_LEN
+ */
+#define    CN23XX_DEFAULT_INPUT_JABBER             0xEA60 /*60000*/
+
+#define    CN23XX_WIN_WR_ADDR_LO                   0x20000
+#define    CN23XX_WIN_WR_ADDR_HI                   0x20004
+#define    CN23XX_WIN_WR_ADDR64                    CN23XX_WIN_WR_ADDR_LO
+
+#define    CN23XX_WIN_RD_ADDR_LO                   0x20010
+#define    CN23XX_WIN_RD_ADDR_HI                   0x20014
+#define    CN23XX_WIN_RD_ADDR64                    CN23XX_WIN_RD_ADDR_LO
+
+#define    CN23XX_WIN_WR_DATA_LO                   0x20020
+#define    CN23XX_WIN_WR_DATA_HI                   0x20024
+#define    CN23XX_WIN_WR_DATA64                    CN23XX_WIN_WR_DATA_LO
+
+#define    CN23XX_WIN_RD_DATA_LO                   0x20040
+#define    CN23XX_WIN_RD_DATA_HI                   0x20044
+#define    CN23XX_WIN_RD_DATA64                    CN23XX_WIN_RD_DATA_LO
+
+#define    CN23XX_WIN_WR_MASK_LO                   0x20030
+#define    CN23XX_WIN_WR_MASK_HI                   0x20034
+#define    CN23XX_WIN_WR_MASK_REG                  CN23XX_WIN_WR_MASK_LO
+#define    CN23XX_SLI_MAC_CREDIT_CNT               0x23D70
+
+/* 4 registers (64-bit) for mapping IOQs to MACs(PEMs)-
+ * SLI_PKT_MAC(0..3)_PF(0..1)_RINFO
+ */
+#define    CN23XX_SLI_PKT_MAC_RINFO_START64       0x29030
+
+/*1 register (64-bit) to determine whether IOQs are in reset. */
+#define    CN23XX_SLI_PKT_IOQ_RING_RST            0x291E0
+
+/* Each Input Queue register is at a 16-byte Offset in BAR0 */
+#define    CN23XX_IQ_OFFSET                       0x20000
+
+#define    CN23XX_MAC_RINFO_OFFSET                0x20
+#define    CN23XX_PF_RINFO_OFFSET                 0x10
+
+#define CN23XX_SLI_PKT_MAC_RINFO64(mac, pf)            \
+               (CN23XX_SLI_PKT_MAC_RINFO_START64 +     \
+                ((mac) * CN23XX_MAC_RINFO_OFFSET) +    \
+                ((pf) * CN23XX_PF_RINFO_OFFSET))
+
+/** mask for total rings, setting TRS to base */
+#define    CN23XX_PKT_MAC_CTL_RINFO_TRS               BIT_ULL(16)
+/** mask for starting ring number: setting SRN <6:0> = 0x7F */
+#define    CN23XX_PKT_MAC_CTL_RINFO_SRN               (0x7F)
+
+/* Starting bit of the TRS field in CN23XX_SLI_PKT_MAC_RINFO64 register */
+#define    CN23XX_PKT_MAC_CTL_RINFO_TRS_BIT_POS     16
+/* Starting bit of SRN field in CN23XX_SLI_PKT_MAC_RINFO64 register */
+#define    CN23XX_PKT_MAC_CTL_RINFO_SRN_BIT_POS     0
+/* Starting bit of RPVF field in CN23XX_SLI_PKT_MAC_RINFO64 register */
+#define    CN23XX_PKT_MAC_CTL_RINFO_RPVF_BIT_POS     32
+/* Starting bit of NVFS field in CN23XX_SLI_PKT_MAC_RINFO64 register */
+#define    CN23XX_PKT_MAC_CTL_RINFO_NVFS_BIT_POS     48
+
+/*###################### REQUEST QUEUE #########################*/
+
+/* 64 registers for Input Queue Instr Count - SLI_PKT_IN_DONE0_CNTS */
+#define    CN23XX_SLI_IQ_INSTR_COUNT_START64     0x10040
+
+/* 64 registers for Input Queues Start Addr - SLI_PKT0_INSTR_BADDR */
+#define    CN23XX_SLI_IQ_BASE_ADDR_START64       0x10010
+
+/* 64 registers for Input Doorbell - SLI_PKT0_INSTR_BAOFF_DBELL */
+#define    CN23XX_SLI_IQ_DOORBELL_START          0x10020
+
+/* 64 registers for Input Queue size - SLI_PKT0_INSTR_FIFO_RSIZE */
+#define    CN23XX_SLI_IQ_SIZE_START              0x10030
+
+/* 64 registers (64-bit) - ES, RO, NS, Arbitration for Input Queue Data &
+ * gather list fetches. SLI_PKT(0..63)_INPUT_CONTROL.
+ */
+#define    CN23XX_SLI_IQ_PKT_CONTROL_START64    0x10000
+
+/*------- Request Queue Macros ---------*/
+#define    CN23XX_SLI_IQ_PKT_CONTROL64(iq)          \
+               (CN23XX_SLI_IQ_PKT_CONTROL_START64 + ((iq) * CN23XX_IQ_OFFSET))
+
+#define    CN23XX_SLI_IQ_BASE_ADDR64(iq)          \
+               (CN23XX_SLI_IQ_BASE_ADDR_START64 + ((iq) * CN23XX_IQ_OFFSET))
+
+#define    CN23XX_SLI_IQ_SIZE(iq)                 \
+               (CN23XX_SLI_IQ_SIZE_START + ((iq) * CN23XX_IQ_OFFSET))
+
+#define    CN23XX_SLI_IQ_DOORBELL(iq)             \
+               (CN23XX_SLI_IQ_DOORBELL_START + ((iq) * CN23XX_IQ_OFFSET))
+
+#define    CN23XX_SLI_IQ_INSTR_COUNT64(iq)          \
+               (CN23XX_SLI_IQ_INSTR_COUNT_START64 + ((iq) * CN23XX_IQ_OFFSET))
+
+/*------------------ Masks ----------------*/
+#define    CN23XX_PKT_INPUT_CTL_VF_NUM                  BIT_ULL(32)
+#define    CN23XX_PKT_INPUT_CTL_MAC_NUM                 BIT(29)
+/* Number of instructions to be read in one MAC read request.
+ * setting to Max value(4)
+ */
+#define    CN23XX_PKT_INPUT_CTL_RDSIZE                  (3 << 25)
+#define    CN23XX_PKT_INPUT_CTL_IS_64B                  BIT(24)
+#define    CN23XX_PKT_INPUT_CTL_RST                     BIT(23)
+#define    CN23XX_PKT_INPUT_CTL_QUIET                   BIT(28)
+#define    CN23XX_PKT_INPUT_CTL_RING_ENB                BIT(22)
+#define    CN23XX_PKT_INPUT_CTL_DATA_NS                 BIT(8)
+#define    CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP        BIT(6)
+#define    CN23XX_PKT_INPUT_CTL_DATA_RO                 BIT(5)
+#define    CN23XX_PKT_INPUT_CTL_USE_CSR                 BIT(4)
+#define    CN23XX_PKT_INPUT_CTL_GATHER_NS               BIT(3)
+#define    CN23XX_PKT_INPUT_CTL_GATHER_ES_64B_SWAP      (2)
+#define    CN23XX_PKT_INPUT_CTL_GATHER_RO               (1)
+
+/** Rings per Virtual Function **/
+#define    CN23XX_PKT_INPUT_CTL_RPVF_MASK               (0x3F)
+#define    CN23XX_PKT_INPUT_CTL_RPVF_POS                (48)
+/** These bits[47:44] select the Physical function number within the MAC */
+#define    CN23XX_PKT_INPUT_CTL_PF_NUM_MASK             (0x7)
+#define    CN23XX_PKT_INPUT_CTL_PF_NUM_POS              (45)
+/** These bits[43:32] select the function number within the PF */
+#define    CN23XX_PKT_INPUT_CTL_VF_NUM_MASK             (0x1FFF)
+#define    CN23XX_PKT_INPUT_CTL_VF_NUM_POS              (32)
+#define    CN23XX_PKT_INPUT_CTL_MAC_NUM_MASK            (0x3)
+#define    CN23XX_PKT_INPUT_CTL_MAC_NUM_POS             (29)
+#define    CN23XX_PKT_IN_DONE_WMARK_MASK                (0xFFFFULL)
+#define    CN23XX_PKT_IN_DONE_WMARK_BIT_POS             (32)
+#define    CN23XX_PKT_IN_DONE_CNT_MASK                  (0x00000000FFFFFFFFULL)
+
+#ifdef __LITTLE_ENDIAN_BITFIELD
+#define    CN23XX_PKT_INPUT_CTL_MASK                           \
+               (CN23XX_PKT_INPUT_CTL_RDSIZE            |       \
+                CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP  |       \
+                CN23XX_PKT_INPUT_CTL_USE_CSR)
+#else
+#define    CN23XX_PKT_INPUT_CTL_MASK                           \
+               (CN23XX_PKT_INPUT_CTL_RDSIZE            |       \
+                CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP  |       \
+                CN23XX_PKT_INPUT_CTL_USE_CSR           |       \
+                CN23XX_PKT_INPUT_CTL_GATHER_ES_64B_SWAP)
+#endif
+
+/** Masks for SLI_PKT_IN_DONE(0..63)_CNTS Register */
+#define    CN23XX_IN_DONE_CNTS_PI_INT               BIT_ULL(62)
+#define    CN23XX_IN_DONE_CNTS_CINT_ENB             BIT_ULL(48)
+
+/*############################ OUTPUT QUEUE #########################*/
+
+/* 64 registers for Output queue control - SLI_PKT(0..63)_OUTPUT_CONTROL */
+#define    CN23XX_SLI_OQ_PKT_CONTROL_START       0x10050
+
+/* 64 registers for Output queue buffer and info size - SLI_PKT0_OUT_SIZE */
+#define    CN23XX_SLI_OQ0_BUFF_INFO_SIZE         0x10060
+
+/* 64 registers for Output Queue Start Addr - SLI_PKT0_SLIST_BADDR */
+#define    CN23XX_SLI_OQ_BASE_ADDR_START64       0x10070
+
+/* 64 registers for Output Queue Packet Credits - SLI_PKT0_SLIST_BAOFF_DBELL */
+#define    CN23XX_SLI_OQ_PKT_CREDITS_START       0x10080
+
+/* 64 registers for Output Queue size - SLI_PKT0_SLIST_FIFO_RSIZE */
+#define    CN23XX_SLI_OQ_SIZE_START              0x10090
+
+/* 64 registers for Output Queue Packet Count - SLI_PKT0_CNTS */
+#define    CN23XX_SLI_OQ_PKT_SENT_START          0x100B0
+
+/* 64 registers for Output Queue INT Levels - SLI_PKT0_INT_LEVELS */
+#define    CN23XX_SLI_OQ_PKT_INT_LEVELS_START64   0x100A0
+
+/* Each Output Queue register is at a 16-byte Offset in BAR0 */
+#define    CN23XX_OQ_OFFSET                      0x20000
+
+/* 1 (64-bit register) for Output Queue backpressure across all rings. */
+#define    CN23XX_SLI_OQ_WMARK                   0x29180
+
+/* Global pkt control register */
+#define    CN23XX_SLI_GBL_CONTROL                0x29210
+
+/* Backpressure enable register for PF0  */
+#define    CN23XX_SLI_OUT_BP_EN_W1S              0x29260
+
+/* Backpressure enable register for PF1  */
+#define    CN23XX_SLI_OUT_BP_EN2_W1S             0x29270
+
+/* Backpressure disable register for PF0  */
+#define    CN23XX_SLI_OUT_BP_EN_W1C              0x29280
+
+/* Backpressure disable register for PF1  */
+#define    CN23XX_SLI_OUT_BP_EN2_W1C             0x29290
+
+/*------- Output Queue Macros ---------*/
+
+#define    CN23XX_SLI_OQ_PKT_CONTROL(oq)          \
+               (CN23XX_SLI_OQ_PKT_CONTROL_START + ((oq) * CN23XX_OQ_OFFSET))
+
+#define    CN23XX_SLI_OQ_BASE_ADDR64(oq)          \
+               (CN23XX_SLI_OQ_BASE_ADDR_START64 + ((oq) * CN23XX_OQ_OFFSET))
+
+#define    CN23XX_SLI_OQ_SIZE(oq)                 \
+               (CN23XX_SLI_OQ_SIZE_START + ((oq) * CN23XX_OQ_OFFSET))
+
+#define    CN23XX_SLI_OQ_BUFF_INFO_SIZE(oq)                 \
+               (CN23XX_SLI_OQ0_BUFF_INFO_SIZE + ((oq) * CN23XX_OQ_OFFSET))
+
+#define    CN23XX_SLI_OQ_PKTS_SENT(oq)            \
+               (CN23XX_SLI_OQ_PKT_SENT_START + ((oq) * CN23XX_OQ_OFFSET))
+
+#define    CN23XX_SLI_OQ_PKTS_CREDIT(oq)          \
+               (CN23XX_SLI_OQ_PKT_CREDITS_START + ((oq) * CN23XX_OQ_OFFSET))
+
+#define    CN23XX_SLI_OQ_PKT_INT_LEVELS(oq)            \
+               (CN23XX_SLI_OQ_PKT_INT_LEVELS_START64 + \
+                ((oq) * CN23XX_OQ_OFFSET))
+
+/*Macro's for accessing CNT and TIME separately from INT_LEVELS*/
+#define    CN23XX_SLI_OQ_PKT_INT_LEVELS_CNT(oq)                \
+               (CN23XX_SLI_OQ_PKT_INT_LEVELS_START64 + \
+                ((oq) * CN23XX_OQ_OFFSET))
+
+#define    CN23XX_SLI_OQ_PKT_INT_LEVELS_TIME(oq)       \
+               (CN23XX_SLI_OQ_PKT_INT_LEVELS_START64 + \
+                ((oq) * CN23XX_OQ_OFFSET) + 4)
+
+/*------------------ Masks ----------------*/
+#define    CN23XX_PKT_OUTPUT_CTL_TENB                  BIT(13)
+#define    CN23XX_PKT_OUTPUT_CTL_CENB                  BIT(12)
+#define    CN23XX_PKT_OUTPUT_CTL_IPTR                  BIT(11)
+#define    CN23XX_PKT_OUTPUT_CTL_ES                    BIT(9)
+#define    CN23XX_PKT_OUTPUT_CTL_NSR                   BIT(8)
+#define    CN23XX_PKT_OUTPUT_CTL_ROR                   BIT(7)
+#define    CN23XX_PKT_OUTPUT_CTL_DPTR                  BIT(6)
+#define    CN23XX_PKT_OUTPUT_CTL_BMODE                 BIT(5)
+#define    CN23XX_PKT_OUTPUT_CTL_ES_P                  BIT(3)
+#define    CN23XX_PKT_OUTPUT_CTL_NSR_P                 BIT(2)
+#define    CN23XX_PKT_OUTPUT_CTL_ROR_P                 BIT(1)
+#define    CN23XX_PKT_OUTPUT_CTL_RING_ENB              BIT(0)
+
+/*######################### Mailbox Reg Macros ########################*/
+#define    CN23XX_SLI_PKT_MBOX_INT_START             0x10210
+#define    CN23XX_SLI_PKT_PF_VF_MBOX_SIG_START       0x10200
+#define    CN23XX_SLI_MAC_PF_MBOX_INT_START          0x27380
+
+#define    CN23XX_SLI_MBOX_OFFSET                   0x20000
+#define    CN23XX_SLI_MBOX_SIG_IDX_OFFSET           0x8
+
+#define    CN23XX_SLI_PKT_MBOX_INT(q)          \
+               (CN23XX_SLI_PKT_MBOX_INT_START + ((q) * CN23XX_SLI_MBOX_OFFSET))
+
+#define    CN23XX_SLI_PKT_PF_VF_MBOX_SIG(q, idx)               \
+               (CN23XX_SLI_PKT_PF_VF_MBOX_SIG_START +          \
+                ((q) * CN23XX_SLI_MBOX_OFFSET +                \
+                 (idx) * CN23XX_SLI_MBOX_SIG_IDX_OFFSET))
+
+#define    CN23XX_SLI_MAC_PF_MBOX_INT(mac, pf)         \
+               (CN23XX_SLI_MAC_PF_MBOX_INT_START +     \
+                ((mac) * CN23XX_MAC_INT_OFFSET +       \
+                 (pf) * CN23XX_PF_INT_OFFSET))
+
+/*######################### DMA Counters #########################*/
+
+/* 2 registers (64-bit) - DMA Count - 1 for each DMA counter 0/1. */
+#define    CN23XX_DMA_CNT_START                   0x28400
+
+/* 2 registers (64-bit) - DMA Timer 0/1, contains DMA timer values */
+/* SLI_DMA_0_TIM */
+#define    CN23XX_DMA_TIM_START                   0x28420
+
+/* 2 registers (64-bit) - DMA count & Time Interrupt threshold -
+ * SLI_DMA_0_INT_LEVEL
+ */
+#define    CN23XX_DMA_INT_LEVEL_START             0x283E0
+
+/* Each DMA register is at a 16-byte Offset in BAR0 */
+#define    CN23XX_DMA_OFFSET                      0x10
+
+/*---------- DMA Counter Macros ---------*/
+#define    CN23XX_DMA_CNT(dq)                      \
+               (CN23XX_DMA_CNT_START + ((dq) * CN23XX_DMA_OFFSET))
+
+#define    CN23XX_DMA_INT_LEVEL(dq)                \
+               (CN23XX_DMA_INT_LEVEL_START + ((dq) * CN23XX_DMA_OFFSET))
+
+#define    CN23XX_DMA_PKT_INT_LEVEL(dq)            \
+               (CN23XX_DMA_INT_LEVEL_START + ((dq) * CN23XX_DMA_OFFSET))
+
+#define    CN23XX_DMA_TIME_INT_LEVEL(dq)           \
+               (CN23XX_DMA_INT_LEVEL_START + 4 + ((dq) * CN23XX_DMA_OFFSET))
+
+#define    CN23XX_DMA_TIM(dq)                     \
+               (CN23XX_DMA_TIM_START + ((dq) * CN23XX_DMA_OFFSET))
+
+/*######################## MSIX TABLE #########################*/
+
+#define        CN23XX_MSIX_TABLE_ADDR_START            0x0
+#define        CN23XX_MSIX_TABLE_DATA_START            0x8
+
+#define        CN23XX_MSIX_TABLE_SIZE                  0x10
+#define        CN23XX_MSIX_TABLE_ENTRIES               0x41
+
+#define CN23XX_MSIX_ENTRY_VECTOR_CTL   BIT_ULL(32)
+
+#define        CN23XX_MSIX_TABLE_ADDR(idx)             \
+       (CN23XX_MSIX_TABLE_ADDR_START + ((idx) * CN23XX_MSIX_TABLE_SIZE))
+
+#define        CN23XX_MSIX_TABLE_DATA(idx)             \
+       (CN23XX_MSIX_TABLE_DATA_START + ((idx) * CN23XX_MSIX_TABLE_SIZE))
+
+/*######################## INTERRUPTS #########################*/
+#define CN23XX_MAC_INT_OFFSET   0x20
+#define CN23XX_PF_INT_OFFSET    0x10
+
+/* 1 register (64-bit) for Interrupt Summary */
+#define    CN23XX_SLI_INT_SUM64            0x27000
+
+/* 4 registers (64-bit) for Interrupt Enable for each Port */
+#define    CN23XX_SLI_INT_ENB64            0x27080
+
+#define    CN23XX_SLI_MAC_PF_INT_SUM64(mac, pf)                        \
+               (CN23XX_SLI_INT_SUM64 +                         \
+                ((mac) * CN23XX_MAC_INT_OFFSET) +              \
+                ((pf) * CN23XX_PF_INT_OFFSET))
+
+#define    CN23XX_SLI_MAC_PF_INT_ENB64(mac, pf)                \
+               (CN23XX_SLI_INT_ENB64 +                 \
+                ((mac) * CN23XX_MAC_INT_OFFSET) +      \
+                ((pf) * CN23XX_PF_INT_OFFSET))
+
+/* 1 register (64-bit) to indicate which Output Queue reached pkt threshold */
+#define    CN23XX_SLI_PKT_CNT_INT                0x29130
+
+/* 1 register (64-bit) to indicate which Output Queue reached time threshold */
+#define    CN23XX_SLI_PKT_TIME_INT               0x29140
+
+/*------------------ Interrupt Masks ----------------*/
+
+#define    CN23XX_INTR_PO_INT                  BIT_ULL(63)
+#define    CN23XX_INTR_PI_INT                  BIT_ULL(62)
+#define    CN23XX_INTR_MBOX_INT                        BIT_ULL(61)
+#define    CN23XX_INTR_RESEND                  BIT_ULL(60)
+
+#define    CN23XX_INTR_CINT_ENB                 BIT_ULL(48)
+#define    CN23XX_INTR_MBOX_ENB                 BIT(0)
+
+#define    CN23XX_INTR_RML_TIMEOUT_ERR           (1)
+
+#define    CN23XX_INTR_MIO_INT                   BIT(1)
+
+#define    CN23XX_INTR_RESERVED1                 (3 << 2)
+
+#define    CN23XX_INTR_PKT_COUNT                 BIT(4)
+#define    CN23XX_INTR_PKT_TIME                  BIT(5)
+
+#define    CN23XX_INTR_RESERVED2                 (3 << 6)
+
+#define    CN23XX_INTR_M0UPB0_ERR                BIT(8)
+#define    CN23XX_INTR_M0UPWI_ERR                BIT(9)
+#define    CN23XX_INTR_M0UNB0_ERR                BIT(10)
+#define    CN23XX_INTR_M0UNWI_ERR                BIT(11)
+
+#define    CN23XX_INTR_RESERVED3                 (0xFFFFFULL << 12)
+
+#define    CN23XX_INTR_DMA0_FORCE                BIT_ULL(32)
+#define    CN23XX_INTR_DMA1_FORCE                BIT_ULL(33)
+
+#define    CN23XX_INTR_DMA0_COUNT                BIT_ULL(34)
+#define    CN23XX_INTR_DMA1_COUNT                BIT_ULL(35)
+
+#define    CN23XX_INTR_DMA0_TIME                 BIT_ULL(36)
+#define    CN23XX_INTR_DMA1_TIME                 BIT_ULL(37)
+
+#define    CN23XX_INTR_RESERVED4                 (0x7FFFFULL << 38)
+
+#define    CN23XX_INTR_VF_MBOX                   BIT_ULL(57)
+#define    CN23XX_INTR_DMAVF_ERR                 BIT_ULL(58)
+#define    CN23XX_INTR_DMAPF_ERR                 BIT_ULL(59)
+
+#define    CN23XX_INTR_PKTVF_ERR                 BIT_ULL(60)
+#define    CN23XX_INTR_PKTPF_ERR                 BIT_ULL(61)
+#define    CN23XX_INTR_PPVF_ERR                  BIT_ULL(62)
+#define    CN23XX_INTR_PPPF_ERR                  BIT_ULL(63)
+
+#define    CN23XX_INTR_DMA0_DATA                 (CN23XX_INTR_DMA0_TIME)
+#define    CN23XX_INTR_DMA1_DATA                 (CN23XX_INTR_DMA1_TIME)
+
+#define    CN23XX_INTR_DMA_DATA                  \
+               (CN23XX_INTR_DMA0_DATA | CN23XX_INTR_DMA1_DATA)
+
+/* By fault only TIME based */
+#define    CN23XX_INTR_PKT_DATA                  (CN23XX_INTR_PKT_TIME)
+/* For both COUNT and TIME based */
+/* #define    CN23XX_INTR_PKT_DATA                  \
+ * (CN23XX_INTR_PKT_COUNT | CN23XX_INTR_PKT_TIME)
+ */
+
+/* Sum of interrupts for all PCI-Express Data Interrupts */
+#define    CN23XX_INTR_PCIE_DATA                 \
+               (CN23XX_INTR_DMA_DATA | CN23XX_INTR_PKT_DAT)
+
+/* Sum of interrupts for error events */
+#define    CN23XX_INTR_ERR                     \
+               (CN23XX_INTR_M0UPB0_ERR |       \
+                CN23XX_INTR_M0UPWI_ERR |       \
+                CN23XX_INTR_M0UNB0_ERR |       \
+                CN23XX_INTR_M0UNWI_ERR |       \
+                CN23XX_INTR_DMAVF_ERR  |       \
+                CN23XX_INTR_DMAPF_ERR  |       \
+                CN23XX_INTR_PKTPF_ERR  |       \
+                CN23XX_INTR_PPPF_ERR   |       \
+                CN23XX_INTR_PPVF_ERR)
+
+/* Programmed Mask for Interrupt Sum */
+#define    CN23XX_INTR_MASK                    \
+               (CN23XX_INTR_DMA_DATA   |       \
+                CN23XX_INTR_DMA0_FORCE |       \
+                CN23XX_INTR_DMA1_FORCE |       \
+                CN23XX_INTR_MIO_INT    |       \
+                CN23XX_INTR_ERR)
+
+/* 4 Registers (64 - bit) */
+#define    CN23XX_SLI_S2M_PORT_CTL_START         0x23D80
+#define    CN23XX_SLI_S2M_PORTX_CTL(port)      \
+               (CN23XX_SLI_S2M_PORT_CTL_START + (port * 0x10))
+
+#define    CN23XX_SLI_MAC_NUMBER                 0x20050
+
+/** PEM(0..3)_BAR1_INDEX(0..15)address is defined as
+ *  addr = (0x00011800C0000100  |port <<24 |idx <<3 )
+ *  Here, port is PEM(0..3) & idx is INDEX(0..15)
+ */
+#define    CN23XX_PEM_BAR1_INDEX_START             0x00011800C0000100ULL
+#define    CN23XX_PEM_OFFSET                       24
+#define    CN23XX_BAR1_INDEX_OFFSET                3
+
+#define    CN23XX_PEM_BAR1_INDEX_REG(port, idx)                \
+               (CN23XX_PEM_BAR1_INDEX_START + ((port) << CN23XX_PEM_OFFSET) + \
+                ((idx) << CN23XX_BAR1_INDEX_OFFSET))
+
+/*############################ DPI #########################*/
+
+/* 1 register (64-bit) - provides DMA Enable */
+#define    CN23XX_DPI_CTL                 0x0001df0000000040ULL
+
+/* 1 register (64-bit) - Controls the DMA IO Operation */
+#define    CN23XX_DPI_DMA_CONTROL         0x0001df0000000048ULL
+
+/* 1 register (64-bit) - Provides DMA Instr'n Queue Enable  */
+#define    CN23XX_DPI_REQ_GBL_ENB         0x0001df0000000050ULL
+
+/* 1 register (64-bit) - DPI_REQ_ERR_RSP
+ * Indicates which Instr'n Queue received error response from the IO sub-system
+ */
+#define    CN23XX_DPI_REQ_ERR_RSP         0x0001df0000000058ULL
+
+/* 1 register (64-bit) - DPI_REQ_ERR_RST
+ * Indicates which Instr'n Queue dropped an Instr'n
+ */
+#define    CN23XX_DPI_REQ_ERR_RST         0x0001df0000000060ULL
+
+/* 6 register (64-bit) - DPI_DMA_ENG(0..5)_EN
+ * Provides DMA Engine Queue Enable
+ */
+#define    CN23XX_DPI_DMA_ENG0_ENB        0x0001df0000000080ULL
+#define    CN23XX_DPI_DMA_ENG_ENB(eng) (CN23XX_DPI_DMA_ENG0_ENB + (eng * 8))
+
+/* 8 register (64-bit) - DPI_DMA(0..7)_REQQ_CTL
+ * Provides control bits for transaction on 8 Queues
+ */
+#define    CN23XX_DPI_DMA_REQQ0_CTL       0x0001df0000000180ULL
+#define    CN23XX_DPI_DMA_REQQ_CTL(q_no)       \
+               (CN23XX_DPI_DMA_REQQ0_CTL + (q_no * 8))
+
+/* 6 register (64-bit) - DPI_ENG(0..5)_BUF
+ * Provides DMA Engine FIFO (Queue) Size
+ */
+#define    CN23XX_DPI_DMA_ENG0_BUF        0x0001df0000000880ULL
+#define    CN23XX_DPI_DMA_ENG_BUF(eng)   \
+               (CN23XX_DPI_DMA_ENG0_BUF + (eng * 8))
+
+/* 4 Registers (64-bit) */
+#define    CN23XX_DPI_SLI_PRT_CFG_START   0x0001df0000000900ULL
+#define    CN23XX_DPI_SLI_PRTX_CFG(port)        \
+               (CN23XX_DPI_SLI_PRT_CFG_START + (port * 0x8))
+
+/* Masks for DPI_DMA_CONTROL Register */
+#define    CN23XX_DPI_DMA_COMMIT_MODE     BIT_ULL(58)
+#define    CN23XX_DPI_DMA_PKT_EN          BIT_ULL(56)
+#define    CN23XX_DPI_DMA_ENB             (0x0FULL << 48)
+/* Set the DMA Control, to update packet count not byte count sent by DMA,
+ * when we use Interrupt Coalescing (CA mode)
+ */
+#define    CN23XX_DPI_DMA_O_ADD1          BIT(19)
+/*selecting 64-bit Byte Swap Mode */
+#define    CN23XX_DPI_DMA_O_ES            BIT(15)
+#define    CN23XX_DPI_DMA_O_MODE          BIT(14)
+
+#define    CN23XX_DPI_DMA_CTL_MASK                     \
+               (CN23XX_DPI_DMA_COMMIT_MODE     |       \
+                CN23XX_DPI_DMA_PKT_EN          |       \
+                CN23XX_DPI_DMA_O_ES            |       \
+                CN23XX_DPI_DMA_O_MODE)
+
+/*############################ RST #########################*/
+
+#define    CN23XX_RST_BOOT            0x0001180006001600ULL
+#define    CN23XX_RST_SOFT_RST        0x0001180006001680ULL
+
+#define    CN23XX_LMC0_RESET_CTL               0x0001180088000180ULL
+#define    CN23XX_LMC0_RESET_CTL_DDR3RST_MASK  0x0000000000000001ULL
+
+#endif
index c03d370..e779af8 100644 (file)
@@ -338,7 +338,7 @@ void lio_cn6xxx_setup_oq_regs(struct octeon_device *oct, u32 oq_no)
        octeon_write_csr(oct, CN6XXX_SLI_PKT_CNT_INT_ENB, intr);
 }
 
-void lio_cn6xxx_enable_io_queues(struct octeon_device *oct)
+int lio_cn6xxx_enable_io_queues(struct octeon_device *oct)
 {
        u32 mask;
 
@@ -353,6 +353,8 @@ void lio_cn6xxx_enable_io_queues(struct octeon_device *oct)
        mask = octeon_read_csr(oct, CN6XXX_SLI_PKT_OUT_ENB);
        mask |= oct->io_qmask.oq;
        octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, mask);
+
+       return 0;
 }
 
 void lio_cn6xxx_disable_io_queues(struct octeon_device *oct)
@@ -418,36 +420,6 @@ void lio_cn6xxx_disable_io_queues(struct octeon_device *oct)
                octeon_write_csr(oct, CN6XXX_SLI_PKT_TIME_INT, d32);
 }
 
-void lio_cn6xxx_reinit_regs(struct octeon_device *oct)
-{
-       int i;
-
-       for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
-               if (!(oct->io_qmask.iq & (1ULL << i)))
-                       continue;
-               oct->fn_list.setup_iq_regs(oct, i);
-       }
-
-       for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
-               if (!(oct->io_qmask.oq & (1ULL << i)))
-                       continue;
-               oct->fn_list.setup_oq_regs(oct, i);
-       }
-
-       oct->fn_list.setup_device_regs(oct);
-
-       oct->fn_list.enable_interrupt(oct->chip);
-
-       oct->fn_list.enable_io_queues(oct);
-
-       /* for (i = 0; i < oct->num_oqs; i++) { */
-       for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
-               if (!(oct->io_qmask.oq & (1ULL << i)))
-                       continue;
-               writel(oct->droq[i]->max_count, oct->droq[i]->pkts_credit_reg);
-       }
-}
-
 void
 lio_cn6xxx_bar1_idx_setup(struct octeon_device *oct,
                          u64 core_addr,
@@ -507,18 +479,20 @@ lio_cn6xxx_update_read_index(struct octeon_instr_queue *iq)
        return new_idx;
 }
 
-void lio_cn6xxx_enable_interrupt(void *chip)
+void lio_cn6xxx_enable_interrupt(struct octeon_device *oct,
+                                u8 unused __attribute__((unused)))
 {
-       struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)chip;
+       struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)oct->chip;
        u64 mask = cn6xxx->intr_mask64 | CN6XXX_INTR_DMA0_FORCE;
 
        /* Enable Interrupt */
        writeq(mask, cn6xxx->intr_enb_reg64);
 }
 
-void lio_cn6xxx_disable_interrupt(void *chip)
+void lio_cn6xxx_disable_interrupt(struct octeon_device *oct,
+                                 u8 unused __attribute__((unused)))
 {
-       struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)chip;
+       struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)oct->chip;
 
        /* Disable Interrupts */
        writeq(0, cn6xxx->intr_enb_reg64);
@@ -714,7 +688,6 @@ int lio_setup_cn66xx_octeon_device(struct octeon_device *oct)
 
        oct->fn_list.soft_reset = lio_cn6xxx_soft_reset;
        oct->fn_list.setup_device_regs = lio_cn6xxx_setup_device_regs;
-       oct->fn_list.reinit_regs = lio_cn6xxx_reinit_regs;
        oct->fn_list.update_iq_read_idx = lio_cn6xxx_update_read_index;
 
        oct->fn_list.bar1_idx_setup = lio_cn6xxx_bar1_idx_setup;
index 28c4722..a40a913 100644 (file)
@@ -80,18 +80,17 @@ void lio_cn6xxx_setup_global_input_regs(struct octeon_device *oct);
 void lio_cn6xxx_setup_global_output_regs(struct octeon_device *oct);
 void lio_cn6xxx_setup_iq_regs(struct octeon_device *oct, u32 iq_no);
 void lio_cn6xxx_setup_oq_regs(struct octeon_device *oct, u32 oq_no);
-void lio_cn6xxx_enable_io_queues(struct octeon_device *oct);
+int lio_cn6xxx_enable_io_queues(struct octeon_device *oct);
 void lio_cn6xxx_disable_io_queues(struct octeon_device *oct);
 irqreturn_t lio_cn6xxx_process_interrupt_regs(void *dev);
-void lio_cn6xxx_reinit_regs(struct octeon_device *oct);
 void lio_cn6xxx_bar1_idx_setup(struct octeon_device *oct, u64 core_addr,
                               u32 idx, int valid);
 void lio_cn6xxx_bar1_idx_write(struct octeon_device *oct, u32 idx, u32 mask);
 u32 lio_cn6xxx_bar1_idx_read(struct octeon_device *oct, u32 idx);
 u32
 lio_cn6xxx_update_read_index(struct octeon_instr_queue *iq);
-void lio_cn6xxx_enable_interrupt(void *chip);
-void lio_cn6xxx_disable_interrupt(void *chip);
+void lio_cn6xxx_enable_interrupt(struct octeon_device *oct, u8 unused);
+void lio_cn6xxx_disable_interrupt(struct octeon_device *oct, u8 unused);
 void cn6xxx_get_pcie_qlmport(struct octeon_device *oct);
 void lio_cn6xxx_setup_reg_address(struct octeon_device *oct, void *chip,
                                  struct octeon_reg_list *reg_list);
index 29755bc..dbf3566 100644 (file)
@@ -148,7 +148,6 @@ int lio_setup_cn68xx_octeon_device(struct octeon_device *oct)
        oct->fn_list.process_interrupt_regs = lio_cn6xxx_process_interrupt_regs;
        oct->fn_list.soft_reset = lio_cn68xx_soft_reset;
        oct->fn_list.setup_device_regs = lio_cn68xx_setup_device_regs;
-       oct->fn_list.reinit_regs = lio_cn6xxx_reinit_regs;
        oct->fn_list.update_iq_read_idx = lio_cn6xxx_update_read_index;
 
        oct->fn_list.bar1_idx_setup = lio_cn6xxx_bar1_idx_setup;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
new file mode 100644 (file)
index 0000000..201eddb
--- /dev/null
@@ -0,0 +1,266 @@
+/**********************************************************************
+* Author: Cavium, Inc.
+*
+* Contact: support@cavium.com
+*          Please include "LiquidIO" in the subject.
+*
+* Copyright (c) 2003-2015 Cavium, Inc.
+*
+* This file is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License, Version 2, as
+* published by the Free Software Foundation.
+*
+* This file is distributed in the hope that it will be useful, but
+* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+* NONINFRINGEMENT.  See the GNU General Public License for more
+* details.
+*
+* This file may also be available under a different license from Cavium.
+* Contact Cavium, Inc. for more information
+**********************************************************************/
+#include <linux/pci.h>
+#include <linux/if_vlan.h>
+#include "liquidio_common.h"
+#include "octeon_droq.h"
+#include "octeon_iq.h"
+#include "response_manager.h"
+#include "octeon_device.h"
+#include "octeon_nic.h"
+#include "octeon_main.h"
+#include "octeon_network.h"
+
+int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1)
+{
+       struct lio *lio = GET_LIO(netdev);
+       struct octeon_device *oct = lio->oct_dev;
+       struct octnic_ctrl_pkt nctrl;
+       int ret = 0;
+
+       memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+       nctrl.ncmd.u64 = 0;
+       nctrl.ncmd.s.cmd = cmd;
+       nctrl.ncmd.s.param1 = param1;
+       nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+       nctrl.wait_time = 100;
+       nctrl.netpndev = (u64)netdev;
+       nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+       ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+       if (ret < 0) {
+               dev_err(&oct->pci_dev->dev, "Feature change failed in core (ret: 0x%x)\n",
+                       ret);
+       }
+       return ret;
+}
+
+void octeon_report_tx_completion_to_bql(void *txq, unsigned int pkts_compl,
+                                       unsigned int bytes_compl)
+{
+       struct netdev_queue *netdev_queue = txq;
+
+       netdev_tx_completed_queue(netdev_queue, pkts_compl, bytes_compl);
+}
+
+void octeon_update_tx_completion_counters(void *buf, int reqtype,
+                                         unsigned int *pkts_compl,
+                                         unsigned int *bytes_compl)
+{
+       struct octnet_buf_free_info *finfo;
+       struct sk_buff *skb = NULL;
+       struct octeon_soft_command *sc;
+
+       switch (reqtype) {
+       case REQTYPE_NORESP_NET:
+       case REQTYPE_NORESP_NET_SG:
+               finfo = buf;
+               skb = finfo->skb;
+               break;
+
+       case REQTYPE_RESP_NET_SG:
+       case REQTYPE_RESP_NET:
+               sc = buf;
+               skb = sc->callback_arg;
+               break;
+
+       default:
+               return;
+       }
+
+       (*pkts_compl)++;
+/*TODO, Use some other pound define to suggest
+ * the fact that iqs are not tied to netdevs
+ * and can take traffic from different netdevs
+ * hence bql reporting is done per packet
+ * than in bulk. Usage of NO_NAPI in txq completion is
+ * a little confusing
+ */
+       *bytes_compl += skb->len;
+}
+
+void octeon_report_sent_bytes_to_bql(void *buf, int reqtype)
+{
+       struct octnet_buf_free_info *finfo;
+       struct sk_buff *skb;
+       struct octeon_soft_command *sc;
+       struct netdev_queue *txq;
+
+       switch (reqtype) {
+       case REQTYPE_NORESP_NET:
+       case REQTYPE_NORESP_NET_SG:
+               finfo = buf;
+               skb = finfo->skb;
+               break;
+
+       case REQTYPE_RESP_NET_SG:
+       case REQTYPE_RESP_NET:
+               sc = buf;
+               skb = sc->callback_arg;
+               break;
+
+       default:
+               return;
+       }
+
+       txq = netdev_get_tx_queue(skb->dev, skb_get_queue_mapping(skb));
+       netdev_tx_sent_queue(txq, skb->len);
+}
+
+void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
+{
+       struct octnic_ctrl_pkt *nctrl = (struct octnic_ctrl_pkt *)nctrl_ptr;
+       struct net_device *netdev = (struct net_device *)nctrl->netpndev;
+       struct lio *lio = GET_LIO(netdev);
+       struct octeon_device *oct = lio->oct_dev;
+       u8 *mac;
+
+       switch (nctrl->ncmd.s.cmd) {
+       case OCTNET_CMD_CHANGE_DEVFLAGS:
+       case OCTNET_CMD_SET_MULTI_LIST:
+               break;
+
+       case OCTNET_CMD_CHANGE_MACADDR:
+               mac = ((u8 *)&nctrl->udd[0]) + 2;
+               netif_info(lio, probe, lio->netdev,
+                          "MACAddr changed to %2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n",
+                          mac[0], mac[1],
+                          mac[2], mac[3],
+                          mac[4], mac[5]);
+               break;
+
+       case OCTNET_CMD_CHANGE_MTU:
+               /* If command is successful, change the MTU. */
+               netif_info(lio, probe, lio->netdev, "MTU Changed from %d to %d\n",
+                          netdev->mtu, nctrl->ncmd.s.param1);
+               dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n",
+                        netdev->name, netdev->mtu,
+                        nctrl->ncmd.s.param1);
+               netdev->mtu = nctrl->ncmd.s.param1;
+               queue_delayed_work(lio->link_status_wq.wq,
+                                  &lio->link_status_wq.wk.work, 0);
+               break;
+
+       case OCTNET_CMD_GPIO_ACCESS:
+               netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n");
+
+               break;
+
+       case OCTNET_CMD_ID_ACTIVE:
+               netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n");
+
+               break;
+
+       case OCTNET_CMD_LRO_ENABLE:
+               dev_info(&oct->pci_dev->dev, "%s LRO Enabled\n", netdev->name);
+               break;
+
+       case OCTNET_CMD_LRO_DISABLE:
+               dev_info(&oct->pci_dev->dev, "%s LRO Disabled\n",
+                        netdev->name);
+               break;
+
+       case OCTNET_CMD_VERBOSE_ENABLE:
+               dev_info(&oct->pci_dev->dev, "%s Firmware debug enabled\n",
+                        netdev->name);
+               break;
+
+       case OCTNET_CMD_VERBOSE_DISABLE:
+               dev_info(&oct->pci_dev->dev, "%s Firmware debug disabled\n",
+                        netdev->name);
+               break;
+
+       case OCTNET_CMD_ENABLE_VLAN_FILTER:
+               dev_info(&oct->pci_dev->dev, "%s VLAN filter enabled\n",
+                        netdev->name);
+               break;
+
+       case OCTNET_CMD_ADD_VLAN_FILTER:
+               dev_info(&oct->pci_dev->dev, "%s VLAN filter %d added\n",
+                        netdev->name, nctrl->ncmd.s.param1);
+               break;
+
+       case OCTNET_CMD_DEL_VLAN_FILTER:
+               dev_info(&oct->pci_dev->dev, "%s VLAN filter %d removed\n",
+                        netdev->name, nctrl->ncmd.s.param1);
+               break;
+
+       case OCTNET_CMD_SET_SETTINGS:
+               dev_info(&oct->pci_dev->dev, "%s settings changed\n",
+                        netdev->name);
+
+               break;
+
+       /* Case to handle "OCTNET_CMD_TNL_RX_CSUM_CTL"
+        * Command passed by NIC driver
+        */
+       case OCTNET_CMD_TNL_RX_CSUM_CTL:
+               if (nctrl->ncmd.s.param1 == OCTNET_CMD_RXCSUM_ENABLE) {
+                       netif_info(lio, probe, lio->netdev,
+                                  "RX Checksum Offload Enabled\n");
+               } else if (nctrl->ncmd.s.param1 ==
+                          OCTNET_CMD_RXCSUM_DISABLE) {
+                       netif_info(lio, probe, lio->netdev,
+                                  "RX Checksum Offload Disabled\n");
+               }
+               break;
+
+               /* Case to handle "OCTNET_CMD_TNL_TX_CSUM_CTL"
+                * Command passed by NIC driver
+                */
+       case OCTNET_CMD_TNL_TX_CSUM_CTL:
+               if (nctrl->ncmd.s.param1 == OCTNET_CMD_TXCSUM_ENABLE) {
+                       netif_info(lio, probe, lio->netdev,
+                                  "TX Checksum Offload Enabled\n");
+               } else if (nctrl->ncmd.s.param1 ==
+                          OCTNET_CMD_TXCSUM_DISABLE) {
+                       netif_info(lio, probe, lio->netdev,
+                                  "TX Checksum Offload Disabled\n");
+               }
+               break;
+
+               /* Case to handle "OCTNET_CMD_VXLAN_PORT_CONFIG"
+                * Command passed by NIC driver
+                */
+       case OCTNET_CMD_VXLAN_PORT_CONFIG:
+               if (nctrl->ncmd.s.more == OCTNET_CMD_VXLAN_PORT_ADD) {
+                       netif_info(lio, probe, lio->netdev,
+                                  "VxLAN Destination UDP PORT:%d ADDED\n",
+                                  nctrl->ncmd.s.param1);
+               } else if (nctrl->ncmd.s.more ==
+                          OCTNET_CMD_VXLAN_PORT_DEL) {
+                       netif_info(lio, probe, lio->netdev,
+                                  "VxLAN Destination UDP PORT:%d DELETED\n",
+                                  nctrl->ncmd.s.param1);
+               }
+               break;
+
+       case OCTNET_CMD_SET_FLOW_CTL:
+               netif_info(lio, probe, lio->netdev, "Set RX/TX flow control parameters\n");
+               break;
+
+       default:
+               dev_err(&oct->pci_dev->dev, "%s Unknown cmd %d\n", __func__,
+                       nctrl->ncmd.s.cmd);
+       }
+}
index 289eb89..f163e0a 100644 (file)
@@ -32,6 +32,7 @@
 #include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
+#include "cn23xx_pf_device.h"
 
 static int octnet_get_link_stats(struct net_device *netdev);
 
@@ -75,6 +76,7 @@ enum {
 
 #define ARRAY_LENGTH(a) (sizeof(a) / sizeof((a)[0]))
 #define OCT_ETHTOOL_REGDUMP_LEN  4096
+#define OCT_ETHTOOL_REGDUMP_LEN_23XX  (4096 * 11)
 #define OCT_ETHTOOL_REGSVER  1
 
 /* statistics of PF */
@@ -188,6 +190,10 @@ static const char oct_droq_stats_strings[][ETH_GSTRING_LEN] = {
        "buffer_alloc_failure",
 };
 
+/* LiquidIO driver private flags */
+static const char oct_priv_flags_strings[][ETH_GSTRING_LEN] = {
+};
+
 #define OCTNIC_NCMD_AUTONEG_ON  0x1
 #define OCTNIC_NCMD_PHY_ON      0x2
 
@@ -259,6 +265,13 @@ lio_ethtool_get_channels(struct net_device *dev,
                max_tx = CFG_GET_IQ_MAX_Q(conf6x);
                rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf6x, lio->ifidx);
                tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf6x, lio->ifidx);
+       } else if (OCTEON_CN23XX_PF(oct)) {
+               struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf);
+
+               max_rx = CFG_GET_OQ_MAX_Q(conf23);
+               max_tx = CFG_GET_IQ_MAX_Q(conf23);
+               rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf23, lio->ifidx);
+               tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf23, lio->ifidx);
        }
 
        channel->max_rx = max_rx;
@@ -290,18 +303,16 @@ lio_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom,
        struct lio *lio = GET_LIO(netdev);
        struct octeon_device *oct_dev = lio->oct_dev;
        struct octeon_board_info *board_info;
-       int len;
 
-       if (eeprom->offset != 0)
+       if (eeprom->offset)
                return -EINVAL;
 
        eeprom->magic = oct_dev->pci_dev->vendor;
        board_info = (struct octeon_board_info *)(&oct_dev->boardinfo);
-       len =
-               sprintf((char *)bytes,
-                       "boardname:%s serialnum:%s maj:%lld min:%lld\n",
-                       board_info->name, board_info->serial_number,
-                       board_info->major, board_info->minor);
+       sprintf((char *)bytes,
+               "boardname:%s serialnum:%s maj:%lld min:%lld\n",
+               board_info->name, board_info->serial_number,
+               board_info->major, board_info->minor);
 
        return 0;
 }
@@ -333,6 +344,32 @@ static int octnet_gpio_access(struct net_device *netdev, int addr, int val)
        return 0;
 }
 
+static int octnet_id_active(struct net_device *netdev, int val)
+{
+       struct lio *lio = GET_LIO(netdev);
+       struct octeon_device *oct = lio->oct_dev;
+       struct octnic_ctrl_pkt nctrl;
+       int ret = 0;
+
+       memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+       nctrl.ncmd.u64 = 0;
+       nctrl.ncmd.s.cmd = OCTNET_CMD_ID_ACTIVE;
+       nctrl.ncmd.s.param1 = val;
+       nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+       nctrl.wait_time = 100;
+       nctrl.netpndev = (u64)netdev;
+       nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+       ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+       if (ret < 0) {
+               dev_err(&oct->pci_dev->dev, "Failed to configure gpio value\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 /* Callback for when mdio command response arrives
  */
 static void octnet_mdio_resp_callback(struct octeon_device *oct,
@@ -406,7 +443,7 @@ octnet_mdio45_access(struct lio *lio, int op, int loc, int *value)
                dev_err(&oct_dev->pci_dev->dev,
                        "octnet_mdio45_access instruction failed status: %x\n",
                        retval);
-               retval =  -EBUSY;
+               retval = -EBUSY;
        } else {
                /* Sleep on a wait queue till the cond flag indicates that the
                 * response arrived
@@ -476,6 +513,11 @@ static int lio_set_phys_id(struct net_device *netdev,
                                                   &value);
                        if (ret)
                                return ret;
+               } else if (oct->chip_id == OCTEON_CN23XX_PF_VID) {
+                       octnet_id_active(netdev, LED_IDENTIFICATION_ON);
+
+                       /* returns 0 since updates are asynchronous */
+                       return 0;
                } else {
                        return -EINVAL;
                }
@@ -521,7 +563,10 @@ static int lio_set_phys_id(struct net_device *netdev,
                                                   &lio->phy_beacon_val);
                        if (ret)
                                return ret;
+               } else if (oct->chip_id == OCTEON_CN23XX_PF_VID) {
+                       octnet_id_active(netdev, LED_IDENTIFICATION_OFF);
 
+                       return 0;
                } else {
                        return -EINVAL;
                }
@@ -550,6 +595,13 @@ lio_ethtool_get_ringparam(struct net_device *netdev,
                rx_max_pending = CN6XXX_MAX_OQ_DESCRIPTORS;
                rx_pending = CFG_GET_NUM_RX_DESCS_NIC_IF(conf6x, lio->ifidx);
                tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf6x, lio->ifidx);
+       } else if (OCTEON_CN23XX_PF(oct)) {
+               struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf);
+
+               tx_max_pending = CN23XX_MAX_IQ_DESCRIPTORS;
+               rx_max_pending = CN23XX_MAX_OQ_DESCRIPTORS;
+               rx_pending = CFG_GET_NUM_RX_DESCS_NIC_IF(conf23, lio->ifidx);
+               tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf23, lio->ifidx);
        }
 
        if (lio->mtu > OCTNET_DEFAULT_FRM_SIZE - OCTNET_FRM_HEADER_SIZE) {
@@ -610,6 +662,69 @@ lio_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
        pause->rx_pause = oct->rx_pause;
 }
 
+static int
+lio_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
+{
+       /* Notes: Not supporting any auto negotiation in these
+        * drivers.
+        */
+       struct lio *lio = GET_LIO(netdev);
+       struct octeon_device *oct = lio->oct_dev;
+       struct octnic_ctrl_pkt nctrl;
+       struct oct_link_info *linfo = &lio->linfo;
+
+       int ret = 0;
+
+       if (oct->chip_id != OCTEON_CN23XX_PF_VID)
+               return -EINVAL;
+
+       if (linfo->link.s.duplex == 0) {
+               /*no flow control for half duplex*/
+               if (pause->rx_pause || pause->tx_pause)
+                       return -EINVAL;
+       }
+
+       /*do not support autoneg of link flow control*/
+       if (pause->autoneg == AUTONEG_ENABLE)
+               return -EINVAL;
+
+       memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+       nctrl.ncmd.u64 = 0;
+       nctrl.ncmd.s.cmd = OCTNET_CMD_SET_FLOW_CTL;
+       nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+       nctrl.wait_time = 100;
+       nctrl.netpndev = (u64)netdev;
+       nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+       if (pause->rx_pause) {
+               /*enable rx pause*/
+               nctrl.ncmd.s.param1 = 1;
+       } else {
+               /*disable rx pause*/
+               nctrl.ncmd.s.param1 = 0;
+       }
+
+       if (pause->tx_pause) {
+               /*enable tx pause*/
+               nctrl.ncmd.s.param2 = 1;
+       } else {
+               /*disable tx pause*/
+               nctrl.ncmd.s.param2 = 0;
+       }
+
+       ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+       if (ret < 0) {
+               dev_err(&oct->pci_dev->dev, "Failed to set pause parameter\n");
+               return -EINVAL;
+       }
+
+       oct->rx_pause = pause->rx_pause;
+       oct->tx_pause = pause->tx_pause;
+
+       return 0;
+}
+
 static void
 lio_get_ethtool_stats(struct net_device *netdev,
                      struct ethtool_stats *stats  __attribute__((unused)),
@@ -877,6 +992,27 @@ lio_get_ethtool_stats(struct net_device *netdev,
        }
 }
 
+static void lio_get_priv_flags_strings(struct lio *lio, u8 *data)
+{
+       struct octeon_device *oct_dev = lio->oct_dev;
+       int i;
+
+       switch (oct_dev->chip_id) {
+       case OCTEON_CN23XX_PF_VID:
+               for (i = 0; i < ARRAY_SIZE(oct_priv_flags_strings); i++) {
+                       sprintf(data, "%s", oct_priv_flags_strings[i]);
+                       data += ETH_GSTRING_LEN;
+               }
+               break;
+       case OCTEON_CN68XX:
+       case OCTEON_CN66XX:
+               break;
+       default:
+               netif_info(lio, drv, lio->netdev, "Unknown Chip !!\n");
+               break;
+       }
+}
+
 static void lio_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 {
        struct lio *lio = GET_LIO(netdev);
@@ -916,12 +1052,31 @@ static void lio_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
                }
                break;
 
+       case ETH_SS_PRIV_FLAGS:
+               lio_get_priv_flags_strings(lio, data);
+               break;
        default:
                netif_info(lio, drv, lio->netdev, "Unknown Stringset !!\n");
                break;
        }
 }
 
+static int lio_get_priv_flags_ss_count(struct lio *lio)
+{
+       struct octeon_device *oct_dev = lio->oct_dev;
+
+       switch (oct_dev->chip_id) {
+       case OCTEON_CN23XX_PF_VID:
+               return ARRAY_SIZE(oct_priv_flags_strings);
+       case OCTEON_CN68XX:
+       case OCTEON_CN66XX:
+               return -EOPNOTSUPP;
+       default:
+               netif_info(lio, drv, lio->netdev, "Unknown Chip !!\n");
+               return -EOPNOTSUPP;
+       }
+}
+
 static int lio_get_sset_count(struct net_device *netdev, int sset)
 {
        struct lio *lio = GET_LIO(netdev);
@@ -932,6 +1087,8 @@ static int lio_get_sset_count(struct net_device *netdev, int sset)
                return (ARRAY_SIZE(oct_stats_strings) +
                        ARRAY_SIZE(oct_iq_stats_strings) * oct_dev->num_iqs +
                        ARRAY_SIZE(oct_droq_stats_strings) * oct_dev->num_oqs);
+       case ETH_SS_PRIV_FLAGS:
+               return lio_get_priv_flags_ss_count(lio);
        default:
                return -EOPNOTSUPP;
        }
@@ -948,6 +1105,16 @@ static int lio_get_intr_coalesce(struct net_device *netdev,
        intrmod_cfg = &oct->intrmod;
 
        switch (oct->chip_id) {
+       case OCTEON_CN23XX_PF_VID:
+               if (!intrmod_cfg->rx_enable) {
+                       intr_coal->rx_coalesce_usecs = intrmod_cfg->rx_usecs;
+                       intr_coal->rx_max_coalesced_frames =
+                               intrmod_cfg->rx_frames;
+               }
+               if (!intrmod_cfg->tx_enable)
+                       intr_coal->tx_max_coalesced_frames =
+                               intrmod_cfg->tx_frames;
+               break;
        case OCTEON_CN68XX:
        case OCTEON_CN66XX: {
                struct octeon_cn6xxx *cn6xxx =
@@ -983,7 +1150,15 @@ static int lio_get_intr_coalesce(struct net_device *netdev,
                intr_coal->rx_coalesce_usecs_low =
                        intrmod_cfg->rx_mintmr_trigger;
                intr_coal->rx_max_coalesced_frames_low =
-                       intrmod_cfg->rx_mincnt_trigger;
+                   intrmod_cfg->rx_mincnt_trigger;
+       }
+       if (OCTEON_CN23XX_PF(oct) &&
+           (intrmod_cfg->tx_enable)) {
+               intr_coal->use_adaptive_tx_coalesce = intrmod_cfg->tx_enable;
+               intr_coal->tx_max_coalesced_frames_high =
+                   intrmod_cfg->tx_maxcnt_trigger;
+               intr_coal->tx_max_coalesced_frames_low =
+                   intrmod_cfg->tx_mincnt_trigger;
        }
        return 0;
 }
@@ -1060,11 +1235,11 @@ static void
 octnet_nic_stats_callback(struct octeon_device *oct_dev,
                          u32 status, void *ptr)
 {
-       struct octeon_soft_command  *sc = (struct octeon_soft_command  *)ptr;
-       struct oct_nic_stats_resp *resp = (struct oct_nic_stats_resp *)
-               sc->virtrptr;
-       struct oct_nic_stats_ctrl *ctrl = (struct oct_nic_stats_ctrl *)
-               sc->ctxptr;
+       struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
+       struct oct_nic_stats_resp *resp =
+           (struct oct_nic_stats_resp *)sc->virtrptr;
+       struct oct_nic_stats_ctrl *ctrl =
+           (struct oct_nic_stats_ctrl *)sc->ctxptr;
        struct nic_rx_stats *rsp_rstats = &resp->stats.fromwire;
        struct nic_tx_stats *rsp_tstats = &resp->stats.fromhost;
 
@@ -1314,14 +1489,35 @@ oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal)
                CFG_SET_OQ_INTR_PKT(cn6xxx->conf, rx_max_coalesced_frames);
                break;
        }
+       case OCTEON_CN23XX_PF_VID: {
+               int q_no;
+
+               if (!intr_coal->rx_max_coalesced_frames)
+                       rx_max_coalesced_frames = oct->intrmod.rx_frames;
+               else
+                       rx_max_coalesced_frames =
+                           intr_coal->rx_max_coalesced_frames;
+               for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+                       q_no += oct->sriov_info.pf_srn;
+                       octeon_write_csr64(
+                           oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no),
+                           (octeon_read_csr64(
+                                oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no)) &
+                            (0x3fffff00000000UL)) |
+                               rx_max_coalesced_frames);
+                       /*consider setting resend bit*/
+               }
+               oct->intrmod.rx_frames = rx_max_coalesced_frames;
+               break;
+       }
        default:
                return -EINVAL;
        }
        return 0;
 }
 
-static int oct_cfg_rx_intrtime(struct lio *lio, struct ethtool_coalesce
-                              *intr_coal)
+static int oct_cfg_rx_intrtime(struct lio *lio,
+                              struct ethtool_coalesce *intr_coal)
 {
        struct octeon_device *oct = lio->oct_dev;
        u32 time_threshold, rx_coalesce_usecs;
@@ -1346,6 +1542,27 @@ static int oct_cfg_rx_intrtime(struct lio *lio, struct ethtool_coalesce
                CFG_SET_OQ_INTR_TIME(cn6xxx->conf, rx_coalesce_usecs);
                break;
        }
+       case OCTEON_CN23XX_PF_VID: {
+               u64 time_threshold;
+               int q_no;
+
+               if (!intr_coal->rx_coalesce_usecs)
+                       rx_coalesce_usecs = oct->intrmod.rx_usecs;
+               else
+                       rx_coalesce_usecs = intr_coal->rx_coalesce_usecs;
+               time_threshold =
+                   cn23xx_pf_get_oq_ticks(oct, (u32)rx_coalesce_usecs);
+               for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+                       q_no += oct->sriov_info.pf_srn;
+                       octeon_write_csr64(oct,
+                                          CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no),
+                                          (oct->intrmod.rx_frames |
+                                           (time_threshold << 32)));
+                       /*consider writing to resend bit here*/
+               }
+               oct->intrmod.rx_usecs = rx_coalesce_usecs;
+               break;
+       }
        default:
                return -EINVAL;
        }
@@ -1358,12 +1575,37 @@ oct_cfg_tx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal
                   __attribute__((unused)))
 {
        struct octeon_device *oct = lio->oct_dev;
+       u32 iq_intr_pkt;
+       void __iomem *inst_cnt_reg;
+       u64 val;
 
        /* Config Cnt based interrupt values */
        switch (oct->chip_id) {
        case OCTEON_CN68XX:
        case OCTEON_CN66XX:
                break;
+       case OCTEON_CN23XX_PF_VID: {
+               int q_no;
+
+               if (!intr_coal->tx_max_coalesced_frames)
+                       iq_intr_pkt = CN23XX_DEF_IQ_INTR_THRESHOLD &
+                                     CN23XX_PKT_IN_DONE_WMARK_MASK;
+               else
+                       iq_intr_pkt = intr_coal->tx_max_coalesced_frames &
+                                     CN23XX_PKT_IN_DONE_WMARK_MASK;
+               for (q_no = 0; q_no < oct->num_iqs; q_no++) {
+                       inst_cnt_reg = (oct->instr_queue[q_no])->inst_cnt_reg;
+                       val = readq(inst_cnt_reg);
+                       /*clear wmark and count.dont want to write count back*/
+                       val = (val & 0xFFFF000000000000ULL) |
+                             ((u64)iq_intr_pkt
+                              << CN23XX_PKT_IN_DONE_WMARK_BIT_POS);
+                       writeq(val, inst_cnt_reg);
+                       /*consider setting resend bit*/
+               }
+               oct->intrmod.tx_frames = iq_intr_pkt;
+               break;
+       }
        default:
                return -EINVAL;
        }
@@ -1399,6 +1641,8 @@ static int lio_set_intr_coalesce(struct net_device *netdev,
                        return -EINVAL;
                }
                break;
+       case OCTEON_CN23XX_PF_VID:
+               break;
        default:
                return -EINVAL;
        }
@@ -1541,9 +1785,237 @@ static int lio_nway_reset(struct net_device *netdev)
 }
 
 /* Return register dump len. */
-static int lio_get_regs_len(struct net_device *dev __attribute__((unused)))
+static int lio_get_regs_len(struct net_device *dev)
 {
-       return OCT_ETHTOOL_REGDUMP_LEN;
+       struct lio *lio = GET_LIO(dev);
+       struct octeon_device *oct = lio->oct_dev;
+
+       switch (oct->chip_id) {
+       case OCTEON_CN23XX_PF_VID:
+               return OCT_ETHTOOL_REGDUMP_LEN_23XX;
+       default:
+               return OCT_ETHTOOL_REGDUMP_LEN;
+       }
+}
+
+static int cn23xx_read_csr_reg(char *s, struct octeon_device *oct)
+{
+       u32 reg;
+       u8 pf_num = oct->pf_num;
+       int len = 0;
+       int i;
+
+       /* PCI  Window Registers */
+
+       len += sprintf(s + len, "\n\t Octeon CSR Registers\n\n");
+
+       /*0x29030 or 0x29040*/
+       reg = CN23XX_SLI_PKT_MAC_RINFO64(oct->pcie_port, oct->pf_num);
+       len += sprintf(s + len,
+                      "\n[%08x] (SLI_PKT_MAC%d_PF%d_RINFO): %016llx\n",
+                      reg, oct->pcie_port, oct->pf_num,
+                      (u64)octeon_read_csr64(oct, reg));
+
+       /*0x27080 or 0x27090*/
+       reg = CN23XX_SLI_MAC_PF_INT_ENB64(oct->pcie_port, oct->pf_num);
+       len +=
+           sprintf(s + len, "\n[%08x] (SLI_MAC%d_PF%d_INT_ENB): %016llx\n",
+                   reg, oct->pcie_port, oct->pf_num,
+                   (u64)octeon_read_csr64(oct, reg));
+
+       /*0x27000 or 0x27010*/
+       reg = CN23XX_SLI_MAC_PF_INT_SUM64(oct->pcie_port, oct->pf_num);
+       len +=
+           sprintf(s + len, "\n[%08x] (SLI_MAC%d_PF%d_INT_SUM): %016llx\n",
+                   reg, oct->pcie_port, oct->pf_num,
+                   (u64)octeon_read_csr64(oct, reg));
+
+       /*0x29120*/
+       reg = 0x29120;
+       len += sprintf(s + len, "\n[%08x] (SLI_PKT_MEM_CTL): %016llx\n", reg,
+                      (u64)octeon_read_csr64(oct, reg));
+
+       /*0x27300*/
+       reg = 0x27300 + oct->pcie_port * CN23XX_MAC_INT_OFFSET +
+             (oct->pf_num) * CN23XX_PF_INT_OFFSET;
+       len += sprintf(
+           s + len, "\n[%08x] (SLI_MAC%d_PF%d_PKT_VF_INT): %016llx\n", reg,
+           oct->pcie_port, oct->pf_num, (u64)octeon_read_csr64(oct, reg));
+
+       /*0x27200*/
+       reg = 0x27200 + oct->pcie_port * CN23XX_MAC_INT_OFFSET +
+             (oct->pf_num) * CN23XX_PF_INT_OFFSET;
+       len += sprintf(s + len,
+                      "\n[%08x] (SLI_MAC%d_PF%d_PP_VF_INT): %016llx\n",
+                      reg, oct->pcie_port, oct->pf_num,
+                      (u64)octeon_read_csr64(oct, reg));
+
+       /*29130*/
+       reg = CN23XX_SLI_PKT_CNT_INT;
+       len += sprintf(s + len, "\n[%08x] (SLI_PKT_CNT_INT): %016llx\n", reg,
+                      (u64)octeon_read_csr64(oct, reg));
+
+       /*0x29140*/
+       reg = CN23XX_SLI_PKT_TIME_INT;
+       len += sprintf(s + len, "\n[%08x] (SLI_PKT_TIME_INT): %016llx\n", reg,
+                      (u64)octeon_read_csr64(oct, reg));
+
+       /*0x29160*/
+       reg = 0x29160;
+       len += sprintf(s + len, "\n[%08x] (SLI_PKT_INT): %016llx\n", reg,
+                      (u64)octeon_read_csr64(oct, reg));
+
+       /*0x29180*/
+       reg = CN23XX_SLI_OQ_WMARK;
+       len += sprintf(s + len, "\n[%08x] (SLI_PKT_OUTPUT_WMARK): %016llx\n",
+                      reg, (u64)octeon_read_csr64(oct, reg));
+
+       /*0x291E0*/
+       reg = CN23XX_SLI_PKT_IOQ_RING_RST;
+       len += sprintf(s + len, "\n[%08x] (SLI_PKT_RING_RST): %016llx\n", reg,
+                      (u64)octeon_read_csr64(oct, reg));
+
+       /*0x29210*/
+       reg = CN23XX_SLI_GBL_CONTROL;
+       len += sprintf(s + len,
+                      "\n[%08x] (SLI_PKT_GBL_CONTROL): %016llx\n", reg,
+                      (u64)octeon_read_csr64(oct, reg));
+
+       /*0x29220*/
+       reg = 0x29220;
+       len += sprintf(s + len, "\n[%08x] (SLI_PKT_BIST_STATUS): %016llx\n",
+                      reg, (u64)octeon_read_csr64(oct, reg));
+
+       /*PF only*/
+       if (pf_num == 0) {
+               /*0x29260*/
+               reg = CN23XX_SLI_OUT_BP_EN_W1S;
+               len += sprintf(s + len,
+                              "\n[%08x] (SLI_PKT_OUT_BP_EN_W1S):  %016llx\n",
+                              reg, (u64)octeon_read_csr64(oct, reg));
+       } else if (pf_num == 1) {
+               /*0x29270*/
+               reg = CN23XX_SLI_OUT_BP_EN2_W1S;
+               len += sprintf(s + len,
+                              "\n[%08x] (SLI_PKT_OUT_BP_EN2_W1S): %016llx\n",
+                              reg, (u64)octeon_read_csr64(oct, reg));
+       }
+
+       for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+               reg = CN23XX_SLI_OQ_BUFF_INFO_SIZE(i);
+               len +=
+                   sprintf(s + len, "\n[%08x] (SLI_PKT%d_OUT_SIZE): %016llx\n",
+                           reg, i, (u64)octeon_read_csr64(oct, reg));
+       }
+
+       /*0x10040*/
+       for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+               reg = CN23XX_SLI_IQ_INSTR_COUNT64(i);
+               len += sprintf(s + len,
+                              "\n[%08x] (SLI_PKT_IN_DONE%d_CNTS): %016llx\n",
+                              reg, i, (u64)octeon_read_csr64(oct, reg));
+       }
+
+       /*0x10080*/
+       for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+               reg = CN23XX_SLI_OQ_PKTS_CREDIT(i);
+               len += sprintf(s + len,
+                              "\n[%08x] (SLI_PKT%d_SLIST_BAOFF_DBELL): %016llx\n",
+                              reg, i, (u64)octeon_read_csr64(oct, reg));
+       }
+
+       /*0x10090*/
+       for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+               reg = CN23XX_SLI_OQ_SIZE(i);
+               len += sprintf(
+                   s + len, "\n[%08x] (SLI_PKT%d_SLIST_FIFO_RSIZE): %016llx\n",
+                   reg, i, (u64)octeon_read_csr64(oct, reg));
+       }
+
+       /*0x10050*/
+       for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+               reg = CN23XX_SLI_OQ_PKT_CONTROL(i);
+               len += sprintf(
+                       s + len,
+                       "\n[%08x] (SLI_PKT%d__OUTPUT_CONTROL): %016llx\n",
+                       reg, i, (u64)octeon_read_csr64(oct, reg));
+       }
+
+       /*0x10070*/
+       for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+               reg = CN23XX_SLI_OQ_BASE_ADDR64(i);
+               len += sprintf(s + len,
+                              "\n[%08x] (SLI_PKT%d_SLIST_BADDR): %016llx\n",
+                              reg, i, (u64)octeon_read_csr64(oct, reg));
+       }
+
+       /*0x100a0*/
+       for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+               reg = CN23XX_SLI_OQ_PKT_INT_LEVELS(i);
+               len += sprintf(s + len,
+                              "\n[%08x] (SLI_PKT%d_INT_LEVELS): %016llx\n",
+                              reg, i, (u64)octeon_read_csr64(oct, reg));
+       }
+
+       /*0x100b0*/
+       for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+               reg = CN23XX_SLI_OQ_PKTS_SENT(i);
+               len += sprintf(s + len, "\n[%08x] (SLI_PKT%d_CNTS): %016llx\n",
+                              reg, i, (u64)octeon_read_csr64(oct, reg));
+       }
+
+       /*0x100c0*/
+       for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+               reg = 0x100c0 + i * CN23XX_OQ_OFFSET;
+               len += sprintf(s + len,
+                              "\n[%08x] (SLI_PKT%d_ERROR_INFO): %016llx\n",
+                              reg, i, (u64)octeon_read_csr64(oct, reg));
+
+               /*0x10000*/
+               for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+                       reg = CN23XX_SLI_IQ_PKT_CONTROL64(i);
+                       len += sprintf(
+                               s + len,
+                               "\n[%08x] (SLI_PKT%d_INPUT_CONTROL): %016llx\n",
+                               reg, i, (u64)octeon_read_csr64(oct, reg));
+               }
+
+               /*0x10010*/
+               for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+                       reg = CN23XX_SLI_IQ_BASE_ADDR64(i);
+                       len += sprintf(
+                           s + len,
+                           "\n[%08x] (SLI_PKT%d_INSTR_BADDR): %016llx\n", reg,
+                           i, (u64)octeon_read_csr64(oct, reg));
+               }
+
+               /*0x10020*/
+               for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+                       reg = CN23XX_SLI_IQ_DOORBELL(i);
+                       len += sprintf(
+                           s + len,
+                           "\n[%08x] (SLI_PKT%d_INSTR_BAOFF_DBELL): %016llx\n",
+                           reg, i, (u64)octeon_read_csr64(oct, reg));
+               }
+
+               /*0x10030*/
+               for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+                       reg = CN23XX_SLI_IQ_SIZE(i);
+                       len += sprintf(
+                           s + len,
+                           "\n[%08x] (SLI_PKT%d_INSTR_FIFO_RSIZE): %016llx\n",
+                           reg, i, (u64)octeon_read_csr64(oct, reg));
+               }
+
+               /*0x10040*/
+               for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++)
+                       reg = CN23XX_SLI_IQ_INSTR_COUNT64(i);
+               len += sprintf(s + len,
+                              "\n[%08x] (SLI_PKT_IN_DONE%d_CNTS): %016llx\n",
+                              reg, i, (u64)octeon_read_csr64(oct, reg));
+       }
+
+       return len;
 }
 
 static int cn6xxx_read_csr_reg(char *s, struct octeon_device *oct)
@@ -1688,6 +2160,10 @@ static void lio_get_regs(struct net_device *dev,
        regs->version = OCT_ETHTOOL_REGSVER;
 
        switch (oct->chip_id) {
+       case OCTEON_CN23XX_PF_VID:
+               memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN_23XX);
+               len += cn23xx_read_csr_reg(regbuf + len, oct);
+               break;
        case OCTEON_CN68XX:
        case OCTEON_CN66XX:
                memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN);
@@ -1729,6 +2205,7 @@ static const struct ethtool_ops lio_ethtool_ops = {
        .get_strings            = lio_get_strings,
        .get_ethtool_stats      = lio_get_ethtool_stats,
        .get_pauseparam         = lio_get_pauseparam,
+       .set_pauseparam         = lio_set_pauseparam,
        .get_regs_len           = lio_get_regs_len,
        .get_regs               = lio_get_regs,
        .get_msglevel           = lio_get_msglevel,
index 20d6942..afc6f9d 100644 (file)
 **********************************************************************/
 #include <linux/version.h>
 #include <linux/pci.h>
-#include <linux/net_tstamp.h>
-#include <linux/if_vlan.h>
 #include <linux/firmware.h>
 #include <linux/ptp_clock_kernel.h>
 #include <net/vxlan.h>
+#include <linux/kthread.h>
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
@@ -37,6 +36,7 @@
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
 #include "cn68xx_device.h"
+#include "cn23xx_pf_device.h"
 #include "liquidio_image.h"
 
 MODULE_AUTHOR("Cavium Networks, <support@cavium.com>");
@@ -52,11 +52,6 @@ module_param(ddr_timeout, int, 0644);
 MODULE_PARM_DESC(ddr_timeout,
                 "Number of milliseconds to wait for DDR initialization. 0 waits for ddr_timeout to be set to non-zero value before starting to check");
 
-static u32 console_bitmask;
-module_param(console_bitmask, int, 0644);
-MODULE_PARM_DESC(console_bitmask,
-                "Bitmask indicating which consoles have debug output redirected to syslog.");
-
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
 #define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count)  \
@@ -102,6 +97,14 @@ struct liquidio_if_cfg_resp {
        u64 status;
 };
 
+struct liquidio_rx_ctl_context {
+       int octeon_id;
+
+       wait_queue_head_t wc;
+
+       int cond;
+};
+
 struct oct_link_status_resp {
        u64 rh;
        struct oct_link_info link_info;
@@ -139,7 +142,8 @@ union tx_info {
 #define OCTNIC_MAX_SG  (MAX_SKB_FRAGS)
 
 #define OCTNIC_GSO_MAX_HEADER_SIZE 128
-#define OCTNIC_GSO_MAX_SIZE (GSO_MAX_SIZE - OCTNIC_GSO_MAX_HEADER_SIZE)
+#define OCTNIC_GSO_MAX_SIZE                                                    \
+       (CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE)
 
 /** Structure of a node in list of gather components maintained by
  * NIC driver for each network device.
@@ -162,27 +166,6 @@ struct octnic_gather {
        u64 sg_dma_ptr;
 };
 
-/** This structure is used by NIC driver to store information required
- * to free the sk_buff when the packet has been fetched by Octeon.
- * Bytes offset below assume worst-case of a 64-bit system.
- */
-struct octnet_buf_free_info {
-       /** Bytes 1-8.  Pointer to network device private structure. */
-       struct lio *lio;
-
-       /** Bytes 9-16.  Pointer to sk_buff. */
-       struct sk_buff *skb;
-
-       /** Bytes 17-24.  Pointer to gather list. */
-       struct octnic_gather *g;
-
-       /** Bytes 25-32. Physical address of skb->data or gather list. */
-       u64 dptr;
-
-       /** Bytes 33-47. Piggybacked soft command, if any */
-       struct octeon_soft_command *sc;
-};
-
 struct handshake {
        struct completion init;
        struct completion started;
@@ -198,6 +181,7 @@ struct octeon_device_priv {
 };
 
 static int octeon_device_init(struct octeon_device *);
+static int liquidio_stop(struct net_device *netdev);
 static void liquidio_remove(struct pci_dev *pdev);
 static int liquidio_probe(struct pci_dev *pdev,
                          const struct pci_device_id *ent);
@@ -219,6 +203,20 @@ static void octeon_droq_bh(unsigned long pdev)
                        continue;
                reschedule |= octeon_droq_process_packets(oct, oct->droq[q_no],
                                                          MAX_PACKET_BUDGET);
+               lio_enable_irq(oct->droq[q_no], NULL);
+
+               if (OCTEON_CN23XX_PF(oct) && oct->msix_on) {
+                       /* set time and cnt interrupt thresholds for this DROQ
+                        * for NAPI
+                        */
+                       int adjusted_q_no = q_no + oct->sriov_info.pf_srn;
+
+                       octeon_write_csr64(
+                           oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(adjusted_q_no),
+                           0x5700000040ULL);
+                       octeon_write_csr64(
+                           oct, CN23XX_SLI_OQ_PKTS_SENT(adjusted_q_no), 0);
+               }
        }
 
        if (reschedule)
@@ -252,76 +250,6 @@ static int lio_wait_for_oq_pkts(struct octeon_device *oct)
        return pkt_cnt;
 }
 
-void octeon_report_tx_completion_to_bql(void *txq, unsigned int pkts_compl,
-                                       unsigned int bytes_compl)
-{
-       struct netdev_queue *netdev_queue = txq;
-
-       netdev_tx_completed_queue(netdev_queue, pkts_compl, bytes_compl);
-}
-
-void octeon_update_tx_completion_counters(void *buf, int reqtype,
-                                         unsigned int *pkts_compl,
-                                         unsigned int *bytes_compl)
-{
-       struct octnet_buf_free_info *finfo;
-       struct sk_buff *skb = NULL;
-       struct octeon_soft_command *sc;
-
-       switch (reqtype) {
-       case REQTYPE_NORESP_NET:
-       case REQTYPE_NORESP_NET_SG:
-               finfo = buf;
-               skb = finfo->skb;
-               break;
-
-       case REQTYPE_RESP_NET_SG:
-       case REQTYPE_RESP_NET:
-               sc = buf;
-               skb = sc->callback_arg;
-               break;
-
-       default:
-               return;
-       }
-
-       (*pkts_compl)++;
-       *bytes_compl += skb->len;
-}
-
-void octeon_report_sent_bytes_to_bql(void *buf, int reqtype)
-{
-       struct octnet_buf_free_info *finfo;
-       struct sk_buff *skb;
-       struct octeon_soft_command *sc;
-       struct netdev_queue *txq;
-
-       switch (reqtype) {
-       case REQTYPE_NORESP_NET:
-       case REQTYPE_NORESP_NET_SG:
-               finfo = buf;
-               skb = finfo->skb;
-               break;
-
-       case REQTYPE_RESP_NET_SG:
-       case REQTYPE_RESP_NET:
-               sc = buf;
-               skb = sc->callback_arg;
-               break;
-
-       default:
-               return;
-       }
-
-       txq = netdev_get_tx_queue(skb->dev, skb_get_queue_mapping(skb));
-       netdev_tx_sent_queue(txq, skb->len);
-}
-
-int octeon_console_debug_enabled(u32 console)
-{
-       return (console_bitmask >> (console)) & 0x1;
-}
-
 /**
  * \brief Forces all IO queues off on a given device
  * @param oct Pointer to Octeon device
@@ -441,7 +369,7 @@ static void stop_pci_io(struct octeon_device *oct)
        pci_disable_device(oct->pci_dev);
 
        /* Disable interrupts  */
-       oct->fn_list.disable_interrupt(oct->chip);
+       oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
 
        pcierror_quiesce_device(oct);
 
@@ -570,6 +498,9 @@ static const struct pci_device_id liquidio_pci_tbl[] = {
        {       /* 66xx */
                PCI_VENDOR_ID_CAVIUM, 0x92, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0
        },
+       {       /* 23xx pf */
+               PCI_VENDOR_ID_CAVIUM, 0x9702, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0
+       },
        {
                0, 0, 0, 0, 0, 0, 0
        }
@@ -587,7 +518,6 @@ static struct pci_driver liquidio_pci_driver = {
        .suspend        = liquidio_suspend,
        .resume         = liquidio_resume,
 #endif
-
 };
 
 /**
@@ -935,6 +865,52 @@ static void print_link_info(struct net_device *netdev)
        }
 }
 
+/**
+ * \brief Routine to notify MTU change
+ * @param work work_struct data structure
+ */
+static void octnet_link_status_change(struct work_struct *work)
+{
+       struct cavium_wk *wk = (struct cavium_wk *)work;
+       struct lio *lio = (struct lio *)wk->ctxptr;
+
+       rtnl_lock();
+       call_netdevice_notifiers(NETDEV_CHANGEMTU, lio->netdev);
+       rtnl_unlock();
+}
+
+/**
+ * \brief Sets up the mtu status change work
+ * @param netdev network device
+ */
+static inline int setup_link_status_change_wq(struct net_device *netdev)
+{
+       struct lio *lio = GET_LIO(netdev);
+       struct octeon_device *oct = lio->oct_dev;
+
+       lio->link_status_wq.wq = alloc_workqueue("link-status",
+                                                WQ_MEM_RECLAIM, 0);
+       if (!lio->link_status_wq.wq) {
+               dev_err(&oct->pci_dev->dev, "unable to create cavium link status wq\n");
+               return -1;
+       }
+       INIT_DELAYED_WORK(&lio->link_status_wq.wk.work,
+                         octnet_link_status_change);
+       lio->link_status_wq.wk.ctxptr = lio;
+
+       return 0;
+}
+
+static inline void cleanup_link_status_change_wq(struct net_device *netdev)
+{
+       struct lio *lio = GET_LIO(netdev);
+
+       if (lio->link_status_wq.wq) {
+               cancel_delayed_work_sync(&lio->link_status_wq.wk.work);
+               destroy_workqueue(lio->link_status_wq.wq);
+       }
+}
+
 /**
  * \brief Update link status
  * @param netdev network device
@@ -973,8 +949,6 @@ static void update_txq_status(struct octeon_device *oct, int iq_num)
        struct lio *lio;
        struct octeon_instr_queue *iq = oct->instr_queue[iq_num];
 
-       /*octeon_update_iq_read_idx(oct, iq);*/
-
        netdev = oct->props[iq->ifidx].netdev;
 
        /* This is needed because the first IQ does not have
@@ -1002,12 +976,32 @@ static void update_txq_status(struct octeon_device *oct, int iq_num)
        }
 }
 
+static
+int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret)
+{
+       struct octeon_device *oct = droq->oct_dev;
+       struct octeon_device_priv *oct_priv =
+           (struct octeon_device_priv *)oct->priv;
+
+       if (droq->ops.poll_mode) {
+               droq->ops.napi_fn(droq);
+       } else {
+               if (ret & MSIX_PO_INT) {
+                       tasklet_schedule(&oct_priv->droq_tasklet);
+                       return 1;
+               }
+               /* this will be flushed periodically by check iq db */
+               if (ret & MSIX_PI_INT)
+                       return 0;
+       }
+       return 0;
+}
+
 /**
  * \brief Droq packet processor sceduler
  * @param oct octeon device
  */
-static
-void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct)
+static void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct)
 {
        struct octeon_device_priv *oct_priv =
                (struct octeon_device_priv *)oct->priv;
@@ -1032,19 +1026,36 @@ void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct)
        }
 }
 
+static irqreturn_t
+liquidio_msix_intr_handler(int irq __attribute__((unused)), void *dev)
+{
+       u64 ret;
+       struct octeon_ioq_vector *ioq_vector = (struct octeon_ioq_vector *)dev;
+       struct octeon_device *oct = ioq_vector->oct_dev;
+       struct octeon_droq *droq = oct->droq[ioq_vector->droq_index];
+
+       ret = oct->fn_list.msix_interrupt_handler(ioq_vector);
+
+       if ((ret & MSIX_PO_INT) || (ret & MSIX_PI_INT))
+               liquidio_schedule_msix_droq_pkt_handler(droq, ret);
+
+       return IRQ_HANDLED;
+}
+
 /**
  * \brief Interrupt handler for octeon
  * @param irq unused
  * @param dev octeon device
  */
 static
-irqreturn_t liquidio_intr_handler(int irq __attribute__((unused)), void *dev)
+irqreturn_t liquidio_legacy_intr_handler(int irq __attribute__((unused)),
+                                        void *dev)
 {
        struct octeon_device *oct = (struct octeon_device *)dev;
        irqreturn_t ret;
 
        /* Disable our interrupts for the duration of ISR */
-       oct->fn_list.disable_interrupt(oct->chip);
+       oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
 
        ret = oct->fn_list.process_interrupt_regs(oct);
 
@@ -1053,7 +1064,7 @@ irqreturn_t liquidio_intr_handler(int irq __attribute__((unused)), void *dev)
 
        /* Re-enable our interrupts  */
        if (!(atomic_read(&oct->status) == OCT_DEV_IN_RESET))
-               oct->fn_list.enable_interrupt(oct->chip);
+               oct->fn_list.enable_interrupt(oct, OCTEON_ALL_INTR);
 
        return ret;
 }
@@ -1067,22 +1078,204 @@ irqreturn_t liquidio_intr_handler(int irq __attribute__((unused)), void *dev)
 static int octeon_setup_interrupt(struct octeon_device *oct)
 {
        int irqret, err;
+       struct msix_entry *msix_entries;
+       int i;
+       int num_ioq_vectors;
+       int num_alloc_ioq_vectors;
 
-       err = pci_enable_msi(oct->pci_dev);
-       if (err)
-               dev_warn(&oct->pci_dev->dev, "Reverting to legacy interrupts. Error: %d\n",
-                        err);
-       else
-               oct->flags |= LIO_FLAG_MSI_ENABLED;
-
-       irqret = request_irq(oct->pci_dev->irq, liquidio_intr_handler,
-                            IRQF_SHARED, "octeon", oct);
-       if (irqret) {
-               if (oct->flags & LIO_FLAG_MSI_ENABLED)
-                       pci_disable_msi(oct->pci_dev);
-               dev_err(&oct->pci_dev->dev, "Request IRQ failed with code: %d\n",
-                       irqret);
-               return 1;
+       if (OCTEON_CN23XX_PF(oct) && oct->msix_on) {
+               oct->num_msix_irqs = oct->sriov_info.num_pf_rings;
+               /* one non ioq interrupt for handling sli_mac_pf_int_sum */
+               oct->num_msix_irqs += 1;
+
+               oct->msix_entries = kcalloc(
+                   oct->num_msix_irqs, sizeof(struct msix_entry), GFP_KERNEL);
+               if (!oct->msix_entries)
+                       return 1;
+
+               msix_entries = (struct msix_entry *)oct->msix_entries;
+               /*Assumption is that pf msix vectors start from pf srn to pf to
+                * trs and not from 0. if not change this code
+                */
+               for (i = 0; i < oct->num_msix_irqs - 1; i++)
+                       msix_entries[i].entry = oct->sriov_info.pf_srn + i;
+               msix_entries[oct->num_msix_irqs - 1].entry =
+                   oct->sriov_info.trs;
+               num_alloc_ioq_vectors = pci_enable_msix_range(
+                                               oct->pci_dev, msix_entries,
+                                               oct->num_msix_irqs,
+                                               oct->num_msix_irqs);
+               if (num_alloc_ioq_vectors < 0) {
+                       dev_err(&oct->pci_dev->dev, "unable to Allocate MSI-X interrupts\n");
+                       kfree(oct->msix_entries);
+                       oct->msix_entries = NULL;
+                       return 1;
+               }
+               dev_dbg(&oct->pci_dev->dev, "OCTEON: Enough MSI-X interrupts are allocated...\n");
+
+               num_ioq_vectors = oct->num_msix_irqs;
+
+               /** For PF, there is one non-ioq interrupt handler */
+               num_ioq_vectors -= 1;
+               irqret = request_irq(msix_entries[num_ioq_vectors].vector,
+                                    liquidio_legacy_intr_handler, 0, "octeon",
+                                    oct);
+               if (irqret) {
+                       dev_err(&oct->pci_dev->dev,
+                               "OCTEON: Request_irq failed for MSIX interrupt Error: %d\n",
+                               irqret);
+                       pci_disable_msix(oct->pci_dev);
+                       kfree(oct->msix_entries);
+                       oct->msix_entries = NULL;
+                       return 1;
+               }
+
+               for (i = 0; i < num_ioq_vectors; i++) {
+                       irqret = request_irq(msix_entries[i].vector,
+                                            liquidio_msix_intr_handler, 0,
+                                            "octeon", &oct->ioq_vector[i]);
+                       if (irqret) {
+                               dev_err(&oct->pci_dev->dev,
+                                       "OCTEON: Request_irq failed for MSIX interrupt Error: %d\n",
+                                       irqret);
+                               /** Freeing the non-ioq irq vector here . */
+                               free_irq(msix_entries[num_ioq_vectors].vector,
+                                        oct);
+
+                               while (i) {
+                                       i--;
+                                       /** clearing affinity mask. */
+                                       irq_set_affinity_hint(
+                                               msix_entries[i].vector, NULL);
+                                       free_irq(msix_entries[i].vector,
+                                                &oct->ioq_vector[i]);
+                               }
+                               pci_disable_msix(oct->pci_dev);
+                               kfree(oct->msix_entries);
+                               oct->msix_entries = NULL;
+                               return 1;
+                       }
+                       oct->ioq_vector[i].vector = msix_entries[i].vector;
+                       /* assign the cpu mask for this msix interrupt vector */
+                       irq_set_affinity_hint(
+                                       msix_entries[i].vector,
+                                       (&oct->ioq_vector[i].affinity_mask));
+               }
+               dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: MSI-X enabled\n",
+                       oct->octeon_id);
+       } else {
+               err = pci_enable_msi(oct->pci_dev);
+               if (err)
+                       dev_warn(&oct->pci_dev->dev, "Reverting to legacy interrupts. Error: %d\n",
+                                err);
+               else
+                       oct->flags |= LIO_FLAG_MSI_ENABLED;
+
+               irqret = request_irq(oct->pci_dev->irq,
+                                    liquidio_legacy_intr_handler, IRQF_SHARED,
+                                    "octeon", oct);
+               if (irqret) {
+                       if (oct->flags & LIO_FLAG_MSI_ENABLED)
+                               pci_disable_msi(oct->pci_dev);
+                       dev_err(&oct->pci_dev->dev, "Request IRQ failed with code: %d\n",
+                               irqret);
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+static int liquidio_watchdog(void *param)
+{
+       u64 wdog;
+       u16 mask_of_stuck_cores = 0;
+       u16 mask_of_crashed_cores = 0;
+       int core_num;
+       u8 core_is_stuck[LIO_MAX_CORES];
+       u8 core_crashed[LIO_MAX_CORES];
+       struct octeon_device *oct = param;
+
+       memset(core_is_stuck, 0, sizeof(core_is_stuck));
+       memset(core_crashed, 0, sizeof(core_crashed));
+
+       while (!kthread_should_stop()) {
+               mask_of_crashed_cores =
+                   (u16)octeon_read_csr64(oct, CN23XX_SLI_SCRATCH2);
+
+               for (core_num = 0; core_num < LIO_MAX_CORES; core_num++) {
+                       if (!core_is_stuck[core_num]) {
+                               wdog = lio_pci_readq(oct, CIU3_WDOG(core_num));
+
+                               /* look at watchdog state field */
+                               wdog &= CIU3_WDOG_MASK;
+                               if (wdog) {
+                                       /* this watchdog timer has expired */
+                                       core_is_stuck[core_num] =
+                                               LIO_MONITOR_WDOG_EXPIRE;
+                                       mask_of_stuck_cores |= (1 << core_num);
+                               }
+                       }
+
+                       if (!core_crashed[core_num])
+                               core_crashed[core_num] =
+                                   (mask_of_crashed_cores >> core_num) & 1;
+               }
+
+               if (mask_of_stuck_cores) {
+                       for (core_num = 0; core_num < LIO_MAX_CORES;
+                            core_num++) {
+                               if (core_is_stuck[core_num] == 1) {
+                                       dev_err(&oct->pci_dev->dev,
+                                               "ERROR: Octeon core %d is stuck!\n",
+                                               core_num);
+                                       /* 2 means we have printk'd  an error
+                                        * so no need to repeat the same printk
+                                        */
+                                       core_is_stuck[core_num] =
+                                               LIO_MONITOR_CORE_STUCK_MSGD;
+                               }
+                       }
+               }
+
+               if (mask_of_crashed_cores) {
+                       for (core_num = 0; core_num < LIO_MAX_CORES;
+                            core_num++) {
+                               if (core_crashed[core_num] == 1) {
+                                       dev_err(&oct->pci_dev->dev,
+                                               "ERROR: Octeon core %d crashed!  See oct-fwdump for details.\n",
+                                               core_num);
+                                       /* 2 means we have printk'd  an error
+                                        * so no need to repeat the same printk
+                                        */
+                                       core_crashed[core_num] =
+                                               LIO_MONITOR_CORE_STUCK_MSGD;
+                               }
+                       }
+               }
+#ifdef CONFIG_MODULE_UNLOAD
+               if (mask_of_stuck_cores || mask_of_crashed_cores) {
+                       /* make module refcount=0 so that rmmod will work */
+                       long refcount;
+
+                       refcount = module_refcount(THIS_MODULE);
+
+                       while (refcount > 0) {
+                               module_put(THIS_MODULE);
+                               refcount = module_refcount(THIS_MODULE);
+                       }
+
+                       /* compensate for and withstand an unlikely (but still
+                        * possible) race condition
+                        */
+                       while (refcount < 0) {
+                               try_module_get(THIS_MODULE);
+                               refcount = module_refcount(THIS_MODULE);
+                       }
+               }
+#endif
+               /* sleep for two seconds */
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(2 * HZ);
        }
 
        return 0;
@@ -1107,6 +1300,9 @@ liquidio_probe(struct pci_dev *pdev,
                return -ENOMEM;
        }
 
+       if (pdev->device == OCTEON_CN23XX_PF_VID)
+               oct_dev->msix_on = LIO_FLAG_MSIX_ENABLED;
+
        dev_info(&pdev->dev, "Initializing device %x:%x.\n",
                 (u32)pdev->vendor, (u32)pdev->device);
 
@@ -1130,6 +1326,30 @@ liquidio_probe(struct pci_dev *pdev,
                return -ENOMEM;
        }
 
+       if (OCTEON_CN23XX_PF(oct_dev)) {
+               u64 scratch1;
+               u8 bus, device, function;
+
+               scratch1 = octeon_read_csr64(oct_dev, CN23XX_SLI_SCRATCH1);
+               if (!(scratch1 & 4ULL)) {
+                       /* Bit 2 of SLI_SCRATCH_1 is a flag that indicates that
+                        * the lio watchdog kernel thread is running for this
+                        * NIC.  Each NIC gets one watchdog kernel thread.
+                        */
+                       scratch1 |= 4ULL;
+                       octeon_write_csr64(oct_dev, CN23XX_SLI_SCRATCH1,
+                                          scratch1);
+
+                       bus = pdev->bus->number;
+                       device = PCI_SLOT(pdev->devfn);
+                       function = PCI_FUNC(pdev->devfn);
+                       oct_dev->watchdog_task = kthread_create(
+                           liquidio_watchdog, oct_dev,
+                           "liowd/%02hhx:%02hhx.%hhx", bus, device, function);
+                       wake_up_process(oct_dev->watchdog_task);
+               }
+       }
+
        oct_dev->rx_pause = 1;
        oct_dev->tx_pause = 1;
 
@@ -1146,6 +1366,7 @@ liquidio_probe(struct pci_dev *pdev,
 static void octeon_destroy_resources(struct octeon_device *oct)
 {
        int i;
+       struct msix_entry *msix_entries;
        struct octeon_device_priv *oct_priv =
                (struct octeon_device_priv *)oct->priv;
 
@@ -1190,21 +1411,40 @@ static void octeon_destroy_resources(struct octeon_device *oct)
                        dev_err(&oct->pci_dev->dev, "OQ had pending packets\n");
 
                /* Disable interrupts  */
-               oct->fn_list.disable_interrupt(oct->chip);
+               oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
+
+               if (oct->msix_on) {
+                       msix_entries = (struct msix_entry *)oct->msix_entries;
+                       for (i = 0; i < oct->num_msix_irqs - 1; i++) {
+                               /* clear the affinity_cpumask */
+                               irq_set_affinity_hint(msix_entries[i].vector,
+                                                     NULL);
+                               free_irq(msix_entries[i].vector,
+                                        &oct->ioq_vector[i]);
+                       }
+                       /* non-iov vector's argument is oct struct */
+                       free_irq(msix_entries[i].vector, oct);
 
-               /* Release the interrupt line */
-               free_irq(oct->pci_dev->irq, oct);
+                       pci_disable_msix(oct->pci_dev);
+                       kfree(oct->msix_entries);
+                       oct->msix_entries = NULL;
+               } else {
+                       /* Release the interrupt line */
+                       free_irq(oct->pci_dev->irq, oct);
 
-               if (oct->flags & LIO_FLAG_MSI_ENABLED)
-                       pci_disable_msi(oct->pci_dev);
+                       if (oct->flags & LIO_FLAG_MSI_ENABLED)
+                               pci_disable_msi(oct->pci_dev);
+               }
 
-               /* fallthrough */
+               if (OCTEON_CN23XX_PF(oct))
+                       octeon_free_ioq_vector(oct);
+       /* fallthrough */
        case OCT_DEV_IN_RESET:
        case OCT_DEV_DROQ_INIT_DONE:
                /*atomic_set(&oct->status, OCT_DEV_DROQ_INIT_DONE);*/
                mdelay(100);
                for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
-                       if (!(oct->io_qmask.oq & (1ULL << i)))
+                       if (!(oct->io_qmask.oq & BIT_ULL(i)))
                                continue;
                        octeon_delete_droq(oct, i);
                }
@@ -1225,17 +1465,16 @@ static void octeon_destroy_resources(struct octeon_device *oct)
        case OCT_DEV_RESP_LIST_INIT_DONE:
                octeon_delete_response_list(oct);
 
-               /* fallthrough */
-       case OCT_DEV_SC_BUFF_POOL_INIT_DONE:
-               octeon_free_sc_buffer_pool(oct);
-
                /* fallthrough */
        case OCT_DEV_INSTR_QUEUE_INIT_DONE:
                for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
-                       if (!(oct->io_qmask.iq & (1ULL << i)))
+                       if (!(oct->io_qmask.iq & BIT_ULL(i)))
                                continue;
                        octeon_delete_instr_queue(oct, i);
                }
+               /* fallthrough */
+       case OCT_DEV_SC_BUFF_POOL_INIT_DONE:
+               octeon_free_sc_buffer_pool(oct);
 
                /* fallthrough */
        case OCT_DEV_DISPATCH_INIT_DONE:
@@ -1244,9 +1483,9 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 
                /* fallthrough */
        case OCT_DEV_PCI_MAP_DONE:
-
                /* Soft reset the octeon device before exiting */
-               oct->fn_list.soft_reset(oct);
+               if ((!OCTEON_CN23XX_PF(oct)) || !oct->octeon_id)
+                       oct->fn_list.soft_reset(oct);
 
                octeon_unmap_pci_barx(oct, 0);
                octeon_unmap_pci_barx(oct, 1);
@@ -1263,6 +1502,34 @@ static void octeon_destroy_resources(struct octeon_device *oct)
        tasklet_kill(&oct_priv->droq_tasklet);
 }
 
+/**
+ * \brief Callback for rx ctrl
+ * @param status status of request
+ * @param buf pointer to resp structure
+ */
+static void rx_ctl_callback(struct octeon_device *oct,
+                           u32 status,
+                           void *buf)
+{
+       struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+       struct liquidio_rx_ctl_context *ctx;
+
+       ctx  = (struct liquidio_rx_ctl_context *)sc->ctxptr;
+
+       oct = lio_get_device(ctx->octeon_id);
+       if (status)
+               dev_err(&oct->pci_dev->dev, "rx ctl instruction failed. Status: %llx\n",
+                       CVM_CAST64(status));
+       WRITE_ONCE(ctx->cond, 1);
+
+       /* This barrier is required to be sure that the response has been
+        * written fully before waking up the handler
+        */
+       wmb();
+
+       wake_up_interruptible(&ctx->wc);
+}
+
 /**
  * \brief Send Rx control command
  * @param lio per-network private data
@@ -1270,17 +1537,55 @@ static void octeon_destroy_resources(struct octeon_device *oct)
  */
 static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 {
-       struct octnic_ctrl_pkt nctrl;
+       struct octeon_soft_command *sc;
+       struct liquidio_rx_ctl_context *ctx;
+       union octnet_cmd *ncmd;
+       int ctx_size = sizeof(struct liquidio_rx_ctl_context);
+       struct octeon_device *oct = (struct octeon_device *)lio->oct_dev;
+       int retval;
 
-       memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+       if (oct->props[lio->ifidx].rx_on == start_stop)
+               return;
 
-       nctrl.ncmd.s.cmd = OCTNET_CMD_RX_CTL;
-       nctrl.ncmd.s.param1 = start_stop;
-       nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-       nctrl.netpndev = (u64)lio->netdev;
+       sc = (struct octeon_soft_command *)
+               octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
+                                         16, ctx_size);
 
-       if (octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl) < 0)
+       ncmd = (union octnet_cmd *)sc->virtdptr;
+       ctx  = (struct liquidio_rx_ctl_context *)sc->ctxptr;
+
+       WRITE_ONCE(ctx->cond, 0);
+       ctx->octeon_id = lio_get_device_id(oct);
+       init_waitqueue_head(&ctx->wc);
+
+       ncmd->u64 = 0;
+       ncmd->s.cmd = OCTNET_CMD_RX_CTL;
+       ncmd->s.param1 = start_stop;
+
+       octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3));
+
+       sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+       octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+                                   OPCODE_NIC_CMD, 0, 0, 0);
+
+       sc->callback = rx_ctl_callback;
+       sc->callback_arg = sc;
+       sc->wait_time = 5000;
+
+       retval = octeon_send_soft_command(oct, sc);
+       if (retval == IQ_SEND_FAILED) {
                netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n");
+       } else {
+               /* Sleep on a wait queue till the cond flag indicates that the
+                * response arrived or timed-out.
+                */
+               if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR)
+                       return;
+               oct->props[lio->ifidx].rx_on = start_stop;
+       }
+
+       octeon_free_soft_command(oct, sc);
 }
 
 /**
@@ -1307,21 +1612,24 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 
        dev_dbg(&oct->pci_dev->dev, "NIC device cleanup\n");
 
-       send_rx_ctrl_cmd(lio, 0);
-
        if (atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING)
-               txqs_stop(netdev);
+               liquidio_stop(netdev);
 
        if (oct->props[lio->ifidx].napi_enabled == 1) {
                list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
                        napi_disable(napi);
 
                oct->props[lio->ifidx].napi_enabled = 0;
+
+               if (OCTEON_CN23XX_PF(oct))
+                       oct->droq[0]->ops.poll_mode = 0;
        }
 
        if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED)
                unregister_netdev(netdev);
 
+       cleanup_link_status_change_wq(netdev);
+
        delete_glists(lio);
 
        free_netdev(netdev);
@@ -1374,6 +1682,9 @@ static void liquidio_remove(struct pci_dev *pdev)
 
        dev_dbg(&oct_dev->pci_dev->dev, "Stopping device\n");
 
+       if (oct_dev->watchdog_task)
+               kthread_stop(oct_dev->watchdog_task);
+
        if (oct_dev->app_mode && (oct_dev->app_mode == CVM_DRV_NIC_APP))
                liquidio_stop_nic_module(oct_dev);
 
@@ -1417,6 +1728,12 @@ static int octeon_chip_specific_setup(struct octeon_device *oct)
                s = "CN66XX";
                break;
 
+       case OCTEON_CN23XX_PCIID_PF:
+               oct->chip_id = OCTEON_CN23XX_PF_VID;
+               ret = setup_cn23xx_octeon_pf_device(oct);
+               s = "CN23XX";
+               break;
+
        default:
                s = "?";
                dev_err(&oct->pci_dev->dev, "Unknown device found (dev_id: %x)\n",
@@ -1867,7 +2184,7 @@ static void if_cfg_callback(struct octeon_device *oct,
        struct liquidio_if_cfg_context *ctx;
 
        resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
-       ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
+       ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
 
        oct = lio_get_device(ctx->octeon_id);
        if (resp->status)
@@ -2060,11 +2377,14 @@ static void napi_schedule_wrapper(void *param)
  */
 static void liquidio_napi_drv_callback(void *arg)
 {
+       struct octeon_device *oct;
        struct octeon_droq *droq = arg;
        int this_cpu = smp_processor_id();
 
-       if (droq->cpu_id == this_cpu) {
-               napi_schedule(&droq->napi);
+       oct = droq->oct_dev;
+
+       if (OCTEON_CN23XX_PF(oct) || droq->cpu_id == this_cpu) {
+               napi_schedule_irqoff(&droq->napi);
        } else {
                struct call_single_data *csd = &droq->csd;
 
@@ -2173,17 +2493,15 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev,
                                                   lio->ifidx), NULL);
                if (retval) {
                        dev_err(&octeon_dev->pci_dev->dev,
-                               " %s : Runtime DROQ(RxQ) creation failed.\n",
+                               "%s : Runtime DROQ(RxQ) creation failed.\n",
                                __func__);
                        return 1;
                }
 
                droq = octeon_dev->droq[q_no];
                napi = &droq->napi;
-               dev_dbg(&octeon_dev->pci_dev->dev,
-                       "netif_napi_add netdev:%llx oct:%llx\n",
-                       (u64)netdev,
-                       (u64)octeon_dev);
+               dev_dbg(&octeon_dev->pci_dev->dev, "netif_napi_add netdev:%llx oct:%llx pf_num:%d\n",
+                       (u64)netdev, (u64)octeon_dev, octeon_dev->pf_num);
                netif_napi_add(netdev, napi, liquidio_napi_poll, 64);
 
                /* designate a CPU for this droq */
@@ -2195,6 +2513,14 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev,
                octeon_register_droq_ops(octeon_dev, q_no, &droq_ops);
        }
 
+       if (OCTEON_CN23XX_PF(octeon_dev)) {
+               /* 23XX PF can receive control messages (via the first PF-owned
+                * droq) from the firmware even if the ethX interface is down,
+                * so that's why poll_mode must be off for the first droq.
+                */
+               octeon_dev->droq[0]->ops.poll_mode = 0;
+       }
+
        /* set up IQs. */
        for (q = 0; q < lio->linfo.num_txpciq; q++) {
                num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(octeon_get_conf
@@ -2235,7 +2561,7 @@ static void octnet_poll_check_txq_status(struct work_struct *work)
  * \brief Sets up the txq poll check
  * @param netdev network device
  */
-static inline void setup_tx_poll_fn(struct net_device *netdev)
+static inline int setup_tx_poll_fn(struct net_device *netdev)
 {
        struct lio *lio = GET_LIO(netdev);
        struct octeon_device *oct = lio->oct_dev;
@@ -2244,21 +2570,24 @@ static inline void setup_tx_poll_fn(struct net_device *netdev)
                                                WQ_MEM_RECLAIM, 0);
        if (!lio->txq_status_wq.wq) {
                dev_err(&oct->pci_dev->dev, "unable to create cavium txq status wq\n");
-               return;
+               return -1;
        }
        INIT_DELAYED_WORK(&lio->txq_status_wq.wk.work,
                          octnet_poll_check_txq_status);
        lio->txq_status_wq.wk.ctxptr = lio;
        queue_delayed_work(lio->txq_status_wq.wq,
                           &lio->txq_status_wq.wk.work, msecs_to_jiffies(1));
+       return 0;
 }
 
 static inline void cleanup_tx_poll_fn(struct net_device *netdev)
 {
        struct lio *lio = GET_LIO(netdev);
 
-       cancel_delayed_work_sync(&lio->txq_status_wq.wk.work);
-       destroy_workqueue(lio->txq_status_wq.wq);
+       if (lio->txq_status_wq.wq) {
+               cancel_delayed_work_sync(&lio->txq_status_wq.wk.work);
+               destroy_workqueue(lio->txq_status_wq.wq);
+       }
 }
 
 /**
@@ -2276,24 +2605,34 @@ static int liquidio_open(struct net_device *netdev)
                        napi_enable(napi);
 
                oct->props[lio->ifidx].napi_enabled = 1;
+
+               if (OCTEON_CN23XX_PF(oct))
+                       oct->droq[0]->ops.poll_mode = 1;
        }
 
        oct_ptp_open(netdev);
 
        ifstate_set(lio, LIO_IFSTATE_RUNNING);
 
-       setup_tx_poll_fn(netdev);
-
-       start_txq(netdev);
+       /* Ready for link status updates */
+       lio->intf_open = 1;
 
        netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
 
+       if (OCTEON_CN23XX_PF(oct)) {
+               if (!oct->msix_on)
+                       if (setup_tx_poll_fn(netdev))
+                               return -1;
+       } else {
+               if (setup_tx_poll_fn(netdev))
+                       return -1;
+       }
+
+       start_txq(netdev);
+
        /* tell Octeon to start forwarding packets to host */
        send_rx_ctrl_cmd(lio, 1);
 
-       /* Ready for link status updates */
-       lio->intf_open = 1;
-
        dev_info(&oct->pci_dev->dev, "%s interface is opened\n",
                 netdev->name);
 
@@ -2328,7 +2667,12 @@ static int liquidio_stop(struct net_device *netdev)
        /* Now it should be safe to tell Octeon that nic interface is down. */
        send_rx_ctrl_cmd(lio, 0);
 
-       cleanup_tx_poll_fn(netdev);
+       if (OCTEON_CN23XX_PF(oct)) {
+               if (!oct->msix_on)
+                       cleanup_tx_poll_fn(netdev);
+       } else {
+               cleanup_tx_poll_fn(netdev);
+       }
 
        if (lio->ptp_clock) {
                ptp_clock_unregister(lio->ptp_clock);
@@ -2340,143 +2684,6 @@ static int liquidio_stop(struct net_device *netdev)
        return 0;
 }
 
-void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
-{
-       struct octnic_ctrl_pkt *nctrl = (struct octnic_ctrl_pkt *)nctrl_ptr;
-       struct net_device *netdev = (struct net_device *)nctrl->netpndev;
-       struct lio *lio = GET_LIO(netdev);
-       struct octeon_device *oct = lio->oct_dev;
-       u8 *mac;
-
-       switch (nctrl->ncmd.s.cmd) {
-       case OCTNET_CMD_CHANGE_DEVFLAGS:
-       case OCTNET_CMD_SET_MULTI_LIST:
-               break;
-
-       case OCTNET_CMD_CHANGE_MACADDR:
-               mac = ((u8 *)&nctrl->udd[0]) + 2;
-               netif_info(lio, probe, lio->netdev,
-                          "%s %2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n",
-                          "MACAddr changed to", mac[0], mac[1],
-                          mac[2], mac[3], mac[4], mac[5]);
-               break;
-
-       case OCTNET_CMD_CHANGE_MTU:
-               /* If command is successful, change the MTU. */
-               netif_info(lio, probe, lio->netdev, " MTU Changed from %d to %d\n",
-                          netdev->mtu, nctrl->ncmd.s.param1);
-               dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n",
-                        netdev->name, netdev->mtu,
-                        nctrl->ncmd.s.param1);
-               rtnl_lock();
-               netdev->mtu = nctrl->ncmd.s.param1;
-               call_netdevice_notifiers(NETDEV_CHANGEMTU, netdev);
-               rtnl_unlock();
-               break;
-
-       case OCTNET_CMD_GPIO_ACCESS:
-               netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n");
-
-               break;
-
-       case OCTNET_CMD_LRO_ENABLE:
-               dev_info(&oct->pci_dev->dev, "%s LRO Enabled\n", netdev->name);
-               break;
-
-       case OCTNET_CMD_LRO_DISABLE:
-               dev_info(&oct->pci_dev->dev, "%s LRO Disabled\n",
-                        netdev->name);
-               break;
-
-       case OCTNET_CMD_VERBOSE_ENABLE:
-               dev_info(&oct->pci_dev->dev, "%s LRO Enabled\n", netdev->name);
-               break;
-
-       case OCTNET_CMD_VERBOSE_DISABLE:
-               dev_info(&oct->pci_dev->dev, "%s LRO Disabled\n",
-                        netdev->name);
-               break;
-
-       case OCTNET_CMD_ENABLE_VLAN_FILTER:
-               dev_info(&oct->pci_dev->dev, "%s VLAN filter enabled\n",
-                        netdev->name);
-               break;
-
-       case OCTNET_CMD_ADD_VLAN_FILTER:
-               dev_info(&oct->pci_dev->dev, "%s VLAN filter %d added\n",
-                        netdev->name, nctrl->ncmd.s.param1);
-               break;
-
-       case OCTNET_CMD_DEL_VLAN_FILTER:
-               dev_info(&oct->pci_dev->dev, "%s VLAN filter %d removed\n",
-                        netdev->name, nctrl->ncmd.s.param1);
-               break;
-
-       case OCTNET_CMD_SET_SETTINGS:
-               dev_info(&oct->pci_dev->dev, "%s settings changed\n",
-                        netdev->name);
-
-               break;
-               /* Case to handle "OCTNET_CMD_TNL_RX_CSUM_CTL"
-                * Command passed by NIC driver
-                */
-       case OCTNET_CMD_TNL_RX_CSUM_CTL:
-               if (nctrl->ncmd.s.param1 == OCTNET_CMD_RXCSUM_ENABLE) {
-                       netif_info(lio, probe, lio->netdev,
-                                  "%s RX Checksum Offload Enabled\n",
-                                  netdev->name);
-               } else if (nctrl->ncmd.s.param1 ==
-                          OCTNET_CMD_RXCSUM_DISABLE) {
-                       netif_info(lio, probe, lio->netdev,
-                                  "%s RX Checksum Offload Disabled\n",
-                                  netdev->name);
-               }
-               break;
-
-               /* Case to handle "OCTNET_CMD_TNL_TX_CSUM_CTL"
-                * Command passed by NIC driver
-                */
-       case OCTNET_CMD_TNL_TX_CSUM_CTL:
-               if (nctrl->ncmd.s.param1 == OCTNET_CMD_TXCSUM_ENABLE) {
-                       netif_info(lio, probe, lio->netdev,
-                                  "%s TX Checksum Offload Enabled\n",
-                                  netdev->name);
-               } else if (nctrl->ncmd.s.param1 ==
-                          OCTNET_CMD_TXCSUM_DISABLE) {
-                       netif_info(lio, probe, lio->netdev,
-                                  "%s TX Checksum Offload Disabled\n",
-                                  netdev->name);
-               }
-               break;
-
-               /* Case to handle "OCTNET_CMD_VXLAN_PORT_CONFIG"
-                * Command passed by NIC driver
-                */
-       case OCTNET_CMD_VXLAN_PORT_CONFIG:
-               if (nctrl->ncmd.s.more == OCTNET_CMD_VXLAN_PORT_ADD) {
-                       netif_info(lio, probe, lio->netdev,
-                                  "%s VxLAN Destination UDP PORT:%d ADDED\n",
-                                  netdev->name,
-                                  nctrl->ncmd.s.param1);
-               } else if (nctrl->ncmd.s.more ==
-                          OCTNET_CMD_VXLAN_PORT_DEL) {
-                       netif_info(lio, probe, lio->netdev,
-                                  "%s VxLAN Destination UDP PORT:%d DELETED\n",
-                                  netdev->name,
-                                  nctrl->ncmd.s.param1);
-               }
-               break;
-
-       case OCTNET_CMD_SET_FLOW_CTL:
-               netif_info(lio, probe, lio->netdev, "Set RX/TX flow control parameters\n");
-               break;
-
-       default:
-               dev_err(&oct->pci_dev->dev, "%s Unknown cmd %d\n", __func__,
-                       nctrl->ncmd.s.cmd);
-       }
-}
-
 /**
  * \brief Converts a mask based on net device flags
  * @param netdev network device
@@ -2817,8 +3024,7 @@ static void handle_timestamp(struct octeon_device *oct,
  */
 static inline int send_nic_timestamp_pkt(struct octeon_device *oct,
                                         struct octnic_data_pkt *ndata,
-                                        struct octnet_buf_free_info *finfo,
-                                        int xmit_more)
+                                        struct octnet_buf_free_info *finfo)
 {
        int retval;
        struct octeon_soft_command *sc;
@@ -2846,9 +3052,15 @@ static inline int send_nic_timestamp_pkt(struct octeon_device *oct,
        sc->callback_arg = finfo->skb;
        sc->iq_no = ndata->q_no;
 
-       len = (u32)((struct octeon_instr_ih2 *)(&sc->cmd.cmd2.ih2))->dlengsz;
+       if (OCTEON_CN23XX_PF(oct))
+               len = (u32)((struct octeon_instr_ih3 *)
+                           (&sc->cmd.cmd3.ih3))->dlengsz;
+       else
+               len = (u32)((struct octeon_instr_ih2 *)
+                           (&sc->cmd.cmd2.ih2))->dlengsz;
+
+       ring_doorbell = 1;
 
-       ring_doorbell = !xmit_more;
        retval = octeon_send_command(oct, sc->iq_no, ring_doorbell, &sc->cmd,
                                     sc, len, ndata->reqtype);
 
@@ -2881,7 +3093,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
        union tx_info *tx_info;
        int status = 0;
        int q_idx = 0, iq_no = 0;
-       int xmit_more, j;
+       int j;
        u64 dptr = 0;
        u32 tag = 0;
 
@@ -2980,7 +3192,10 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
                        return NETDEV_TX_BUSY;
                }
 
-               ndata.cmd.cmd2.dptr = dptr;
+               if (OCTEON_CN23XX_PF(oct))
+                       ndata.cmd.cmd3.dptr = dptr;
+               else
+                       ndata.cmd.cmd2.dptr = dptr;
                finfo->dptr = dptr;
                ndata.reqtype = REQTYPE_NORESP_NET;
 
@@ -3055,15 +3270,23 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
                                           g->sg_size, DMA_TO_DEVICE);
                dptr = g->sg_dma_ptr;
 
-               ndata.cmd.cmd2.dptr = dptr;
+               if (OCTEON_CN23XX_PF(oct))
+                       ndata.cmd.cmd3.dptr = dptr;
+               else
+                       ndata.cmd.cmd2.dptr = dptr;
                finfo->dptr = dptr;
                finfo->g = g;
 
                ndata.reqtype = REQTYPE_NORESP_NET_SG;
        }
 
-       irh = (struct octeon_instr_irh *)&ndata.cmd.cmd2.irh;
-       tx_info = (union tx_info *)&ndata.cmd.cmd2.ossp[0];
+       if (OCTEON_CN23XX_PF(oct)) {
+               irh = (struct octeon_instr_irh *)&ndata.cmd.cmd3.irh;
+               tx_info = (union tx_info *)&ndata.cmd.cmd3.ossp[0];
+       } else {
+               irh = (struct octeon_instr_irh *)&ndata.cmd.cmd2.irh;
+               tx_info = (union tx_info *)&ndata.cmd.cmd2.ossp[0];
+       }
 
        if (skb_shinfo(skb)->gso_size) {
                tx_info->s.gso_size = skb_shinfo(skb)->gso_size;
@@ -3077,12 +3300,10 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
                irh->vlan = skb_vlan_tag_get(skb) & 0xfff;
        }
 
-       xmit_more = skb->xmit_more;
-
        if (unlikely(cmdsetup.s.timestamp))
-               status = send_nic_timestamp_pkt(oct, &ndata, finfo, xmit_more);
+               status = send_nic_timestamp_pkt(oct, &ndata, finfo);
        else
-               status = octnet_send_nic_data_pkt(oct, &ndata, xmit_more);
+               status = octnet_send_nic_data_pkt(oct, &ndata);
        if (status == IQ_SEND_FAILED)
                goto lio_xmit_failed;
 
@@ -3190,8 +3411,8 @@ static int liquidio_vlan_rx_kill_vid(struct net_device *netdev,
  *                              OCTNET_CMD_RXCSUM_DISABLE
  * @returns                     SUCCESS or FAILURE
  */
-int liquidio_set_rxcsum_command(struct net_device *netdev, int command,
-                               u8 rx_cmd)
+static int liquidio_set_rxcsum_command(struct net_device *netdev, int command,
+                                      u8 rx_cmd)
 {
        struct lio *lio = GET_LIO(netdev);
        struct octeon_device *oct = lio->oct_dev;
@@ -3249,31 +3470,6 @@ static int liquidio_vxlan_port_command(struct net_device *netdev, int command,
        return ret;
 }
 
-int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1)
-{
-       struct lio *lio = GET_LIO(netdev);
-       struct octeon_device *oct = lio->oct_dev;
-       struct octnic_ctrl_pkt nctrl;
-       int ret = 0;
-
-       memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
-
-       nctrl.ncmd.u64 = 0;
-       nctrl.ncmd.s.cmd = cmd;
-       nctrl.ncmd.s.param1 = param1;
-       nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-       nctrl.wait_time = 100;
-       nctrl.netpndev = (u64)netdev;
-       nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
-
-       ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-       if (ret < 0) {
-               dev_err(&oct->pci_dev->dev, "Feature change failed in core (ret: 0x%x)\n",
-                       ret);
-       }
-       return ret;
-}
-
 /** \brief Net device fix features
  * @param netdev  pointer to network device
  * @param request features requested
@@ -3492,8 +3688,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
        union oct_nic_if_cfg if_cfg;
        unsigned int base_queue;
        unsigned int gmx_port_id;
-       u32 resp_size, ctx_size;
+       u32 resp_size, ctx_size, data_size;
        u32 ifidx_or_pfnum;
+       struct lio_version *vdata;
 
        /* This is to handle link status changes */
        octeon_register_dispatch_fn(octeon_dev, OPCODE_NIC,
@@ -3515,21 +3712,37 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
        for (i = 0; i < octeon_dev->ifcount; i++) {
                resp_size = sizeof(struct liquidio_if_cfg_resp);
                ctx_size = sizeof(struct liquidio_if_cfg_context);
+               data_size = sizeof(struct lio_version);
                sc = (struct octeon_soft_command *)
-                       octeon_alloc_soft_command(octeon_dev, 0,
+                       octeon_alloc_soft_command(octeon_dev, data_size,
                                                  resp_size, ctx_size);
                resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
                ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
+               vdata = (struct lio_version *)sc->virtdptr;
+
+               *((u64 *)vdata) = 0;
+               vdata->major = cpu_to_be16(LIQUIDIO_BASE_MAJOR_VERSION);
+               vdata->minor = cpu_to_be16(LIQUIDIO_BASE_MINOR_VERSION);
+               vdata->micro = cpu_to_be16(LIQUIDIO_BASE_MICRO_VERSION);
 
-               num_iqueues =
-                       CFG_GET_NUM_TXQS_NIC_IF(octeon_get_conf(octeon_dev), i);
-               num_oqueues =
-                       CFG_GET_NUM_RXQS_NIC_IF(octeon_get_conf(octeon_dev), i);
-               base_queue =
-                       CFG_GET_BASE_QUE_NIC_IF(octeon_get_conf(octeon_dev), i);
-               gmx_port_id =
-                       CFG_GET_GMXID_NIC_IF(octeon_get_conf(octeon_dev), i);
-               ifidx_or_pfnum = i;
+               if (OCTEON_CN23XX_PF(octeon_dev)) {
+                       num_iqueues = octeon_dev->sriov_info.num_pf_rings;
+                       num_oqueues = octeon_dev->sriov_info.num_pf_rings;
+                       base_queue = octeon_dev->sriov_info.pf_srn;
+
+                       gmx_port_id = octeon_dev->pf_num;
+                       ifidx_or_pfnum = octeon_dev->pf_num;
+               } else {
+                       num_iqueues = CFG_GET_NUM_TXQS_NIC_IF(
+                                               octeon_get_conf(octeon_dev), i);
+                       num_oqueues = CFG_GET_NUM_RXQS_NIC_IF(
+                                               octeon_get_conf(octeon_dev), i);
+                       base_queue = CFG_GET_BASE_QUE_NIC_IF(
+                                               octeon_get_conf(octeon_dev), i);
+                       gmx_port_id = CFG_GET_GMXID_NIC_IF(
+                                               octeon_get_conf(octeon_dev), i);
+                       ifidx_or_pfnum = i;
+               }
 
                dev_dbg(&octeon_dev->pci_dev->dev,
                        "requesting config for interface %d, iqs %d, oqs %d\n",
@@ -3566,7 +3779,11 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
                /* Sleep on a wait queue till the cond flag indicates that the
                 * response arrived or timed-out.
                 */
-               sleep_cond(&ctx->wc, &ctx->cond);
+               if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) {
+                       dev_err(&octeon_dev->pci_dev->dev, "Wait interrupted\n");
+                       goto setup_nic_wait_intr;
+               }
+
                retval = resp->status;
                if (retval) {
                        dev_err(&octeon_dev->pci_dev->dev, "iq/oq config failed\n");
@@ -3633,12 +3850,16 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 
                lio->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
 
-               lio->dev_capability = NETIF_F_HIGHDMA
-                               | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
-                               | NETIF_F_SG | NETIF_F_RXCSUM
-                               | NETIF_F_GRO
-                               | NETIF_F_TSO | NETIF_F_TSO6
-                               | NETIF_F_LRO;
+               if (OCTEON_CN23XX_PF(octeon_dev) ||
+                   OCTEON_CN6XXX(octeon_dev)) {
+                       lio->dev_capability = NETIF_F_HIGHDMA
+                                             | NETIF_F_IP_CSUM
+                                             | NETIF_F_IPV6_CSUM
+                                             | NETIF_F_SG | NETIF_F_RXCSUM
+                                             | NETIF_F_GRO
+                                             | NETIF_F_TSO | NETIF_F_TSO6
+                                             | NETIF_F_LRO;
+               }
                netif_set_gso_max_size(netdev, OCTNIC_GSO_MAX_SIZE);
 
                /*  Copy of transmit encapsulation capabilities:
@@ -3713,7 +3934,10 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 
                /* Register ethtool support */
                liquidio_set_ethtool_ops(netdev);
-               octeon_dev->priv_flags = 0x0;
+               if (lio->oct_dev->chip_id == OCTEON_CN23XX_PF_VID)
+                       octeon_dev->priv_flags = OCT_PRIV_FLAG_DEFAULT;
+               else
+                       octeon_dev->priv_flags = 0x0;
 
                if (netdev->features & NETIF_F_LRO)
                        liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE,
@@ -3725,6 +3949,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
                        liquidio_set_feature(netdev,
                                             OCTNET_CMD_VERBOSE_ENABLE, 0);
 
+               if (setup_link_status_change_wq(netdev))
+                       goto setup_nic_dev_fail;
+
                /* Register the network device with the OS */
                if (register_netdev(netdev)) {
                        dev_err(&octeon_dev->pci_dev->dev, "Device registration failed\n");
@@ -3760,6 +3987,8 @@ setup_nic_dev_fail:
 
        octeon_free_soft_command(octeon_dev, sc);
 
+setup_nic_wait_intr:
+
        while (i--) {
                dev_err(&octeon_dev->pci_dev->dev,
                        "NIC ifidx:%d Setup failed\n", i);
@@ -3789,8 +4018,7 @@ static int liquidio_init_nic_module(struct octeon_device *oct)
        /* run port_config command for each port */
        oct->ifcount = num_nic_ports;
 
-       memset(oct->props, 0,
-              sizeof(struct octdev_props) * num_nic_ports);
+       memset(oct->props, 0, sizeof(struct octdev_props) * num_nic_ports);
 
        for (i = 0; i < MAX_OCTEON_LINKS; i++)
                oct->props[i].gmxport = -1;
@@ -3806,7 +4034,7 @@ static int liquidio_init_nic_module(struct octeon_device *oct)
        /* Initialize interrupt moderation params */
        intrmod_cfg = &((struct octeon_device *)oct)->intrmod;
        intrmod_cfg->rx_enable = 1;
-       intrmod_cfg->check_intrvl =   LIO_INTRMOD_CHECK_INTERVAL;
+       intrmod_cfg->check_intrvl = LIO_INTRMOD_CHECK_INTERVAL;
        intrmod_cfg->maxpkt_ratethr = LIO_INTRMOD_MAXPKT_RATETHR;
        intrmod_cfg->minpkt_ratethr = LIO_INTRMOD_MINPKT_RATETHR;
        intrmod_cfg->rx_maxcnt_trigger = LIO_INTRMOD_RXMAXCNT_TRIGGER;
@@ -3818,6 +4046,7 @@ static int liquidio_init_nic_module(struct octeon_device *oct)
        intrmod_cfg->tx_mincnt_trigger = LIO_INTRMOD_TXMINCNT_TRIGGER;
        intrmod_cfg->rx_frames = CFG_GET_OQ_INTR_PKT(octeon_get_conf(oct));
        intrmod_cfg->rx_usecs = CFG_GET_OQ_INTR_TIME(octeon_get_conf(oct));
+       intrmod_cfg->tx_frames = CFG_GET_IQ_INTR_PKT(octeon_get_conf(oct));
        dev_dbg(&oct->pci_dev->dev, "Network interfaces ready\n");
 
        return retval;
@@ -3880,6 +4109,7 @@ static void nic_starter(struct work_struct *work)
 static int octeon_device_init(struct octeon_device *octeon_dev)
 {
        int j, ret;
+       int fw_loaded = 0;
        char bootcmd[] = "\n";
        struct octeon_device_priv *oct_priv =
                (struct octeon_device_priv *)octeon_dev->priv;
@@ -3901,9 +4131,23 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 
        octeon_dev->app_mode = CVM_DRV_INVALID_APP;
 
-       /* Do a soft reset of the Octeon device. */
-       if (octeon_dev->fn_list.soft_reset(octeon_dev))
+       if (OCTEON_CN23XX_PF(octeon_dev)) {
+               if (!cn23xx_fw_loaded(octeon_dev)) {
+                       fw_loaded = 0;
+                       /* Do a soft reset of the Octeon device. */
+                       if (octeon_dev->fn_list.soft_reset(octeon_dev))
+                               return 1;
+                       /* things might have changed */
+                       if (!cn23xx_fw_loaded(octeon_dev))
+                               fw_loaded = 0;
+                       else
+                               fw_loaded = 1;
+               } else {
+                       fw_loaded = 1;
+               }
+       } else if (octeon_dev->fn_list.soft_reset(octeon_dev)) {
                return 1;
+       }
 
        /* Initialize the dispatch mechanism used to push packets arriving on
         * Octeon Output queues.
@@ -3925,6 +4169,22 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 
        octeon_set_io_queues_off(octeon_dev);
 
+       if (OCTEON_CN23XX_PF(octeon_dev)) {
+               ret = octeon_dev->fn_list.setup_device_regs(octeon_dev);
+               if (ret) {
+                       dev_err(&octeon_dev->pci_dev->dev, "OCTEON: Failed to configure device registers\n");
+                       return ret;
+               }
+       }
+
+       /* Initialize soft command buffer pool
+        */
+       if (octeon_setup_sc_buffer_pool(octeon_dev)) {
+               dev_err(&octeon_dev->pci_dev->dev, "sc buffer pool allocation failed\n");
+               return 1;
+       }
+       atomic_set(&octeon_dev->status, OCT_DEV_SC_BUFF_POOL_INIT_DONE);
+
        /*  Setup the data structures that manage this Octeon's Input queues. */
        if (octeon_setup_instr_queues(octeon_dev)) {
                dev_err(&octeon_dev->pci_dev->dev,
@@ -3936,14 +4196,6 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
        }
        atomic_set(&octeon_dev->status, OCT_DEV_INSTR_QUEUE_INIT_DONE);
 
-       /* Initialize soft command buffer pool
-        */
-       if (octeon_setup_sc_buffer_pool(octeon_dev)) {
-               dev_err(&octeon_dev->pci_dev->dev, "sc buffer pool allocation failed\n");
-               return 1;
-       }
-       atomic_set(&octeon_dev->status, OCT_DEV_SC_BUFF_POOL_INIT_DONE);
-
        /* Initialize lists to manage the requests of different types that
         * arrive from user & kernel applications for this octeon device.
         */
@@ -3963,15 +4215,23 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 
        atomic_set(&octeon_dev->status, OCT_DEV_DROQ_INIT_DONE);
 
-       /* The input and output queue registers were setup earlier (the queues
-        * were not enabled). Any additional registers that need to be
-        * programmed should be done now.
-        */
-       ret = octeon_dev->fn_list.setup_device_regs(octeon_dev);
-       if (ret) {
-               dev_err(&octeon_dev->pci_dev->dev,
-                       "Failed to configure device registers\n");
-               return ret;
+       if (OCTEON_CN23XX_PF(octeon_dev)) {
+               if (octeon_allocate_ioq_vector(octeon_dev)) {
+                       dev_err(&octeon_dev->pci_dev->dev, "OCTEON: ioq vector allocation failed\n");
+                       return 1;
+               }
+
+       } else {
+               /* The input and output queue registers were setup earlier (the
+                * queues were not enabled). Any additional registers
+                * that need to be programmed should be done now.
+                */
+               ret = octeon_dev->fn_list.setup_device_regs(octeon_dev);
+               if (ret) {
+                       dev_err(&octeon_dev->pci_dev->dev,
+                               "Failed to configure device registers\n");
+                       return ret;
+               }
        }
 
        /* Initialize the tasklet that handles output queue packet processing.*/
@@ -3985,63 +4245,76 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
                return 1;
 
        /* Enable Octeon device interrupts */
-       octeon_dev->fn_list.enable_interrupt(octeon_dev->chip);
+       octeon_dev->fn_list.enable_interrupt(octeon_dev, OCTEON_ALL_INTR);
 
        /* Enable the input and output queues for this Octeon device */
-       octeon_dev->fn_list.enable_io_queues(octeon_dev);
+       ret = octeon_dev->fn_list.enable_io_queues(octeon_dev);
+       if (ret) {
+               dev_err(&octeon_dev->pci_dev->dev, "Failed to enable input/output queues");
+               return ret;
+       }
 
        atomic_set(&octeon_dev->status, OCT_DEV_IO_QUEUES_DONE);
 
-       dev_dbg(&octeon_dev->pci_dev->dev, "Waiting for DDR initialization...\n");
-
-       if (ddr_timeout == 0)
-               dev_info(&octeon_dev->pci_dev->dev, "WAITING. Set ddr_timeout to non-zero value to proceed with initialization.\n");
+       if ((!OCTEON_CN23XX_PF(octeon_dev)) || !fw_loaded) {
+               dev_dbg(&octeon_dev->pci_dev->dev, "Waiting for DDR initialization...\n");
+               if (!ddr_timeout) {
+                       dev_info(&octeon_dev->pci_dev->dev,
+                                "WAITING. Set ddr_timeout to non-zero value to proceed with initialization.\n");
+               }
 
-       schedule_timeout_uninterruptible(HZ * LIO_RESET_SECS);
+               schedule_timeout_uninterruptible(HZ * LIO_RESET_SECS);
 
-       /* Wait for the octeon to initialize DDR after the soft-reset. */
-       while (ddr_timeout == 0) {
-               set_current_state(TASK_INTERRUPTIBLE);
-               if (schedule_timeout(HZ / 10)) {
-                       /* user probably pressed Control-C */
+               /* Wait for the octeon to initialize DDR after the soft-reset.*/
+               while (!ddr_timeout) {
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       if (schedule_timeout(HZ / 10)) {
+                               /* user probably pressed Control-C */
+                               return 1;
+                       }
+               }
+               ret = octeon_wait_for_ddr_init(octeon_dev, &ddr_timeout);
+               if (ret) {
+                       dev_err(&octeon_dev->pci_dev->dev,
+                               "DDR not initialized. Please confirm that board is configured to boot from Flash, ret: %d\n",
+                               ret);
                        return 1;
                }
-       }
-       ret = octeon_wait_for_ddr_init(octeon_dev, &ddr_timeout);
-       if (ret) {
-               dev_err(&octeon_dev->pci_dev->dev,
-                       "DDR not initialized. Please confirm that board is configured to boot from Flash, ret: %d\n",
-                       ret);
-               return 1;
-       }
 
-       if (octeon_wait_for_bootloader(octeon_dev, 1000) != 0) {
-               dev_err(&octeon_dev->pci_dev->dev, "Board not responding\n");
-               return 1;
-       }
+               if (octeon_wait_for_bootloader(octeon_dev, 1000)) {
+                       dev_err(&octeon_dev->pci_dev->dev, "Board not responding\n");
+                       return 1;
+               }
 
-       /* Divert uboot to take commands from host instead. */
-       ret = octeon_console_send_cmd(octeon_dev, bootcmd, 50);
+               /* Divert uboot to take commands from host instead. */
+               ret = octeon_console_send_cmd(octeon_dev, bootcmd, 50);
 
-       dev_dbg(&octeon_dev->pci_dev->dev, "Initializing consoles\n");
-       ret = octeon_init_consoles(octeon_dev);
-       if (ret) {
-               dev_err(&octeon_dev->pci_dev->dev, "Could not access board consoles\n");
-               return 1;
-       }
-       ret = octeon_add_console(octeon_dev, 0);
-       if (ret) {
-               dev_err(&octeon_dev->pci_dev->dev, "Could not access board console\n");
-               return 1;
-       }
+               dev_dbg(&octeon_dev->pci_dev->dev, "Initializing consoles\n");
+               ret = octeon_init_consoles(octeon_dev);
+               if (ret) {
+                       dev_err(&octeon_dev->pci_dev->dev, "Could not access board consoles\n");
+                       return 1;
+               }
+               ret = octeon_add_console(octeon_dev, 0);
+               if (ret) {
+                       dev_err(&octeon_dev->pci_dev->dev, "Could not access board console\n");
+                       return 1;
+               }
 
-       atomic_set(&octeon_dev->status, OCT_DEV_CONSOLE_INIT_DONE);
+               atomic_set(&octeon_dev->status, OCT_DEV_CONSOLE_INIT_DONE);
 
-       dev_dbg(&octeon_dev->pci_dev->dev, "Loading firmware\n");
-       ret = load_firmware(octeon_dev);
-       if (ret) {
-               dev_err(&octeon_dev->pci_dev->dev, "Could not load firmware to board\n");
-               return 1;
+               dev_dbg(&octeon_dev->pci_dev->dev, "Loading firmware\n");
+               ret = load_firmware(octeon_dev);
+               if (ret) {
+                       dev_err(&octeon_dev->pci_dev->dev, "Could not load firmware to board\n");
+                       return 1;
+               }
+               /* set bit 1 of SLI_SCRATCH_1 to indicate that firmware is
+                * loaded
+                */
+               if (OCTEON_CN23XX_PF(octeon_dev))
+                       octeon_write_csr64(octeon_dev, CN23XX_SLI_SCRATCH1,
+                                          2ULL);
        }
 
        handshake[octeon_dev->octeon_id].init_ok = 1;
@@ -4057,7 +4330,6 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
                       octeon_dev->droq[j]->pkts_credit_reg);
 
        /* Packets can start arriving on the output queues from this point. */
-
        return 0;
 }
 
index 199a8b9..0d990ac 100644 (file)
 
 #include "octeon_config.h"
 
-#define LIQUIDIO_BASE_VERSION   "1.4"
-#define LIQUIDIO_MICRO_VERSION  ".1"
 #define LIQUIDIO_PACKAGE ""
-#define LIQUIDIO_VERSION  "1.4.1"
+#define LIQUIDIO_BASE_MAJOR_VERSION 1
+#define LIQUIDIO_BASE_MINOR_VERSION 4
+#define LIQUIDIO_BASE_MICRO_VERSION 1
+#define LIQUIDIO_BASE_VERSION   __stringify(LIQUIDIO_BASE_MAJOR_VERSION) "." \
+                               __stringify(LIQUIDIO_BASE_MINOR_VERSION)
+#define LIQUIDIO_MICRO_VERSION  "." __stringify(LIQUIDIO_BASE_MICRO_VERSION)
+#define LIQUIDIO_VERSION        LIQUIDIO_PACKAGE \
+                               __stringify(LIQUIDIO_BASE_MAJOR_VERSION) "." \
+                               __stringify(LIQUIDIO_BASE_MINOR_VERSION) \
+                               "." __stringify(LIQUIDIO_BASE_MICRO_VERSION)
+
+struct lio_version {
+       u16  major;
+       u16  minor;
+       u16  micro;
+       u16  reserved;
+};
 
 #define CONTROL_IQ 0
 /** Tag types used by Octeon cores in its work. */
@@ -218,6 +232,9 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 #define   OCTNET_CMD_ADD_VLAN_FILTER  0x17
 #define   OCTNET_CMD_DEL_VLAN_FILTER  0x18
 #define   OCTNET_CMD_VXLAN_PORT_CONFIG 0x19
+
+#define   OCTNET_CMD_ID_ACTIVE         0x1a
+
 #define   OCTNET_CMD_VXLAN_PORT_ADD    0x0
 #define   OCTNET_CMD_VXLAN_PORT_DEL    0x1
 #define   OCTNET_CMD_RXCSUM_ENABLE     0x0
@@ -296,6 +313,13 @@ union octnet_cmd {
 
 #define   OCTNET_CMD_SIZE     (sizeof(union octnet_cmd))
 
+/*pkiih3 + irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
+#define LIO_SOFTCMDRESP_IH2       40
+#define LIO_SOFTCMDRESP_IH3       (40 + 8)
+
+#define LIO_PCICMD_O2             24
+#define LIO_PCICMD_O3             (24 + 8)
+
 /* Instruction Header(DPI) - for OCTEON-III models */
 struct  octeon_instr_ih3 {
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -814,6 +838,8 @@ struct oct_link_stats {
 #define VITESSE_PHY_GPIO_DRIVEOFF 0x4
 #define VITESSE_PHY_GPIO_HIGH     0x2
 #define VITESSE_PHY_GPIO_LOW      0x3
+#define LED_IDENTIFICATION_ON     0x1
+#define LED_IDENTIFICATION_OFF    0x0
 
 struct oct_mdio_cmd {
        u64 op;
@@ -832,7 +858,7 @@ struct oct_mdio_cmd {
 /* intrmod: max. packets to trigger interrupt */
 #define LIO_INTRMOD_RXMAXCNT_TRIGGER   384
 /* intrmod: min. packets to trigger interrupt */
-#define LIO_INTRMOD_RXMINCNT_TRIGGER   1
+#define LIO_INTRMOD_RXMINCNT_TRIGGER   0
 /* intrmod: max. time to trigger interrupt */
 #define LIO_INTRMOD_RXMAXTMR_TRIGGER   128
 /* 66xx:intrmod: min. time to trigger interrupt
index b3396e3..c765568 100644 (file)
 #define   DEFAULT_NUM_NIC_PORTS_68XX   4
 #define   DEFAULT_NUM_NIC_PORTS_68XX_210NV  2
 
+/* CN23xx  IQ configuration macros */
+#define   CN23XX_MAX_RINGS_PER_PF_PASS_1_0 12
+#define   CN23XX_MAX_RINGS_PER_PF_PASS_1_1 32
+#define   CN23XX_MAX_RINGS_PER_PF          64
+
+#define   CN23XX_MAX_INPUT_QUEUES      CN23XX_MAX_RINGS_PER_PF
+#define   CN23XX_MAX_IQ_DESCRIPTORS    2048
+#define   CN23XX_DB_MIN                 1
+#define   CN23XX_DB_MAX                 8
+#define   CN23XX_DB_TIMEOUT             1
+
+#define   CN23XX_MAX_OUTPUT_QUEUES     CN23XX_MAX_RINGS_PER_PF
+#define   CN23XX_MAX_OQ_DESCRIPTORS    2048
+#define   CN23XX_OQ_BUF_SIZE           1536
+#define   CN23XX_OQ_PKTSPER_INTR       128
+/*#define CAVIUM_ONLY_CN23XX_RX_PERF*/
+#define   CN23XX_OQ_REFIL_THRESHOLD    128
+
+#define   CN23XX_OQ_INTR_PKT           64
+#define   CN23XX_OQ_INTR_TIME          100
+#define   DEFAULT_NUM_NIC_PORTS_23XX   1
+
+#define   CN23XX_CFG_IO_QUEUES         CN23XX_MAX_RINGS_PER_PF
+/* PEMs count */
+#define   CN23XX_MAX_MACS              4
+
+#define   CN23XX_DEF_IQ_INTR_THRESHOLD 32
+#define   CN23XX_DEF_IQ_INTR_BYTE_THRESHOLD   (64 * 1024)
 /* common OCTEON configuration macros */
 #define   CN6XXX_CFG_IO_QUEUES         32
 #define   OCTEON_32BYTE_INSTR          32
 #define CFG_GET_IQ_DB_MIN(cfg)                   ((cfg)->iq.db_min)
 #define CFG_GET_IQ_DB_TIMEOUT(cfg)               ((cfg)->iq.db_timeout)
 
+#define CFG_GET_IQ_INTR_PKT(cfg)                 ((cfg)->iq.iq_intr_pkt)
+#define CFG_SET_IQ_INTR_PKT(cfg, val)            (cfg)->iq.iq_intr_pkt = val
+
 #define CFG_GET_OQ_MAX_Q(cfg)                    ((cfg)->oq.max_oqs)
 #define CFG_GET_OQ_INFO_PTR(cfg)                 ((cfg)->oq.info_ptr)
 #define CFG_GET_OQ_PKTS_PER_INTR(cfg)            ((cfg)->oq.pkts_per_intr)
 enum lio_card_type {
        LIO_210SV = 0, /* Two port, 66xx */
        LIO_210NV,     /* Two port, 68xx */
-       LIO_410NV      /* Four port, 68xx */
+       LIO_410NV,     /* Four port, 68xx */
+       LIO_23XX       /* 23xx */
 };
 
 #define LIO_210SV_NAME "210sv"
 #define LIO_210NV_NAME "210nv"
 #define LIO_410NV_NAME "410nv"
+#define LIO_23XX_NAME  "23xx"
 
 /** Structure to define the configuration attributes for each Input queue.
  *  Applicable to all Octeon processors
  **/
 struct octeon_iq_config {
 #ifdef __BIG_ENDIAN_BITFIELD
-       u64 reserved:32;
+       u64 reserved:16;
+
+       /** Tx interrupt packets. Applicable to 23xx only */
+       u64 iq_intr_pkt:16;
 
        /** Minimum ticks to wait before checking for pending instructions. */
        u64 db_timeout:16;
@@ -192,7 +228,10 @@ struct octeon_iq_config {
        /** Minimum ticks to wait before checking for pending instructions. */
        u64 db_timeout:16;
 
-       u64 reserved:32;
+       /** Tx interrupt packets. Applicable to 23xx only */
+       u64 iq_intr_pkt:16;
+
+       u64 reserved:16;
 #endif
 };
 
@@ -416,11 +455,15 @@ struct octeon_config {
 #define DISPATCH_LIST_SIZE                      BIT(OPCODE_MASK_BITS)
 
 /* Maximum number of Octeon Instruction (command) queues */
-#define MAX_OCTEON_INSTR_QUEUES(oct)         CN6XXX_MAX_INPUT_QUEUES
-/* Maximum number of Octeon Output queues */
-#define MAX_OCTEON_OUTPUT_QUEUES(oct)         CN6XXX_MAX_OUTPUT_QUEUES
+#define MAX_OCTEON_INSTR_QUEUES(oct)           \
+               (OCTEON_CN23XX_PF(oct) ? CN23XX_MAX_INPUT_QUEUES : \
+                                       CN6XXX_MAX_INPUT_QUEUES)
 
-#define MAX_POSSIBLE_OCTEON_INSTR_QUEUES       CN6XXX_MAX_INPUT_QUEUES
-#define MAX_POSSIBLE_OCTEON_OUTPUT_QUEUES      CN6XXX_MAX_OUTPUT_QUEUES
+/* Maximum number of Octeon Instruction (command) queues */
+#define MAX_OCTEON_OUTPUT_QUEUES(oct)          \
+               (OCTEON_CN23XX_PF(oct) ? CN23XX_MAX_OUTPUT_QUEUES : \
+                                       CN6XXX_MAX_OUTPUT_QUEUES)
 
+#define MAX_POSSIBLE_OCTEON_INSTR_QUEUES       CN23XX_MAX_INPUT_QUEUES
+#define MAX_POSSIBLE_OCTEON_OUTPUT_QUEUES      CN23XX_MAX_OUTPUT_QUEUES
 #endif /* __OCTEON_CONFIG_H__  */
index bbb50ea..01a50f3 100644 (file)
  */
 #include <linux/pci.h>
 #include <linux/netdevice.h>
+#include <linux/crc32.h>
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_main.h"
+#include "liquidio_image.h"
 #include "octeon_mem_ops.h"
 
 static void octeon_remote_lock(void);
@@ -40,6 +41,10 @@ static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct,
                                             u32 flags);
 static int octeon_console_read(struct octeon_device *oct, u32 console_num,
                               char *buffer, u32 buf_size);
+static u32 console_bitmask;
+module_param(console_bitmask, int, 0644);
+MODULE_PARM_DESC(console_bitmask,
+                "Bitmask indicating which consoles have debug output redirected to syslog.");
 
 #define MIN(a, b) min((a), (b))
 #define CAST_ULL(v) ((u64)(v))
@@ -177,6 +182,15 @@ struct octeon_pci_console_desc {
        __cvmx_bootmem_desc_get(oct, addr,                               \
                offsetof(struct cvmx_bootmem_named_block_desc, field),   \
                SIZEOF_FIELD(struct cvmx_bootmem_named_block_desc, field))
+/**
+ * \brief determines if a given console has debug enabled.
+ * @param console console to check
+ * @returns  1 = enabled. 0 otherwise
+ */
+static int octeon_console_debug_enabled(u32 console)
+{
+       return (console_bitmask >> (console)) & 0x1;
+}
 
 /**
  * This function is the implementation of the get macros defined
@@ -709,3 +723,104 @@ static int octeon_console_read(struct octeon_device *oct, u32 console_num,
 
        return bytes_to_read;
 }
+
+#define FBUF_SIZE      (4 * 1024 * 1024)
+u8 fbuf[FBUF_SIZE];
+
+int octeon_download_firmware(struct octeon_device *oct, const u8 *data,
+                            size_t size)
+{
+       int ret = 0;
+       u8 *p = fbuf;
+       u32 crc32_result;
+       u64 load_addr;
+       u32 image_len;
+       struct octeon_firmware_file_header *h;
+       u32 i, rem;
+
+       if (size < sizeof(struct octeon_firmware_file_header)) {
+               dev_err(&oct->pci_dev->dev, "Firmware file too small (%d < %d).\n",
+                       (u32)size,
+                       (u32)sizeof(struct octeon_firmware_file_header));
+               return -EINVAL;
+       }
+
+       h = (struct octeon_firmware_file_header *)data;
+
+       if (be32_to_cpu(h->magic) != LIO_NIC_MAGIC) {
+               dev_err(&oct->pci_dev->dev, "Unrecognized firmware file.\n");
+               return -EINVAL;
+       }
+
+       crc32_result = crc32((unsigned int)~0, data,
+                            sizeof(struct octeon_firmware_file_header) -
+                            sizeof(u32)) ^ ~0U;
+       if (crc32_result != be32_to_cpu(h->crc32)) {
+               dev_err(&oct->pci_dev->dev, "Firmware CRC mismatch (0x%08x != 0x%08x).\n",
+                       crc32_result, be32_to_cpu(h->crc32));
+               return -EINVAL;
+       }
+
+       if (strncmp(LIQUIDIO_PACKAGE, h->version, strlen(LIQUIDIO_PACKAGE))) {
+               dev_err(&oct->pci_dev->dev, "Unmatched firmware package type. Expected %s, got %s.\n",
+                       LIQUIDIO_PACKAGE, h->version);
+               return -EINVAL;
+       }
+
+       if (memcmp(LIQUIDIO_BASE_VERSION, h->version + strlen(LIQUIDIO_PACKAGE),
+                  strlen(LIQUIDIO_BASE_VERSION))) {
+               dev_err(&oct->pci_dev->dev, "Unmatched firmware version. Expected %s.x, got %s.\n",
+                       LIQUIDIO_BASE_VERSION,
+                       h->version + strlen(LIQUIDIO_PACKAGE));
+               return -EINVAL;
+       }
+
+       if (be32_to_cpu(h->num_images) > LIO_MAX_IMAGES) {
+               dev_err(&oct->pci_dev->dev, "Too many images in firmware file (%d).\n",
+                       be32_to_cpu(h->num_images));
+               return -EINVAL;
+       }
+
+       dev_info(&oct->pci_dev->dev, "Firmware version: %s\n", h->version);
+       snprintf(oct->fw_info.liquidio_firmware_version, 32, "LIQUIDIO: %s",
+                h->version);
+
+       data += sizeof(struct octeon_firmware_file_header);
+
+       dev_info(&oct->pci_dev->dev, "%s: Loading %d images\n", __func__,
+                be32_to_cpu(h->num_images));
+       /* load all images */
+       for (i = 0; i < be32_to_cpu(h->num_images); i++) {
+               load_addr = be64_to_cpu(h->desc[i].addr);
+               image_len = be32_to_cpu(h->desc[i].len);
+
+               dev_info(&oct->pci_dev->dev, "Loading firmware %d at %llx\n",
+                        image_len, load_addr);
+
+               /* Write in 4MB chunks*/
+               rem = image_len;
+
+               while (rem) {
+                       if (rem < FBUF_SIZE)
+                               size = rem;
+                       else
+                               size = FBUF_SIZE;
+
+                       memcpy(p, data, size);
+
+                       /* download the image */
+                       octeon_pci_write_core_mem(oct, load_addr, p, (u32)size);
+
+                       data += size;
+                       rem -= (u32)size;
+                       load_addr += size;
+               }
+       }
+       dev_info(&oct->pci_dev->dev, "Writing boot command: %s\n",
+                h->bootcmd);
+
+       /* Invoke the bootcmd */
+       ret = octeon_console_send_cmd(oct, h->bootcmd, 50);
+
+       return 0;
+}
index 0eb504a..586b688 100644 (file)
@@ -20,7 +20,6 @@
 * Contact Cavium, Inc. for more information
 **********************************************************************/
 #include <linux/pci.h>
-#include <linux/crc32.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
 #include "liquidio_common.h"
@@ -32,8 +31,7 @@
 #include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
-#include "liquidio_image.h"
-#include "octeon_mem_ops.h"
+#include "cn23xx_pf_device.h"
 
 /** Default configuration
  *  for CN66XX OCTEON Models.
@@ -420,6 +418,108 @@ static struct octeon_config default_cn68xx_210nv_conf = {
        ,
 };
 
+static struct octeon_config default_cn23xx_conf = {
+       .card_type                              = LIO_23XX,
+       .card_name                              = LIO_23XX_NAME,
+       /** IQ attributes */
+       .iq = {
+               .max_iqs                = CN23XX_CFG_IO_QUEUES,
+               .pending_list_size      = (CN23XX_MAX_IQ_DESCRIPTORS *
+                                          CN23XX_CFG_IO_QUEUES),
+               .instr_type             = OCTEON_64BYTE_INSTR,
+               .db_min                 = CN23XX_DB_MIN,
+               .db_timeout             = CN23XX_DB_TIMEOUT,
+               .iq_intr_pkt            = CN23XX_DEF_IQ_INTR_THRESHOLD,
+       },
+
+       /** OQ attributes */
+       .oq = {
+               .max_oqs                = CN23XX_CFG_IO_QUEUES,
+               .info_ptr               = OCTEON_OQ_INFOPTR_MODE,
+               .pkts_per_intr  = CN23XX_OQ_PKTSPER_INTR,
+               .refill_threshold       = CN23XX_OQ_REFIL_THRESHOLD,
+               .oq_intr_pkt    = CN23XX_OQ_INTR_PKT,
+               .oq_intr_time   = CN23XX_OQ_INTR_TIME,
+       },
+
+       .num_nic_ports                          = DEFAULT_NUM_NIC_PORTS_23XX,
+       .num_def_rx_descs                       = CN23XX_MAX_OQ_DESCRIPTORS,
+       .num_def_tx_descs                       = CN23XX_MAX_IQ_DESCRIPTORS,
+       .def_rx_buf_size                        = CN23XX_OQ_BUF_SIZE,
+
+       /* For ethernet interface 0:  Port cfg Attributes */
+       .nic_if_cfg[0] = {
+               /* Max Txqs: Half for each of the two ports :max_iq/2 */
+               .max_txqs                       = MAX_TXQS_PER_INTF,
+
+               /* Actual configured value. Range could be: 1...max_txqs */
+               .num_txqs                       = DEF_TXQS_PER_INTF,
+
+               /* Max Rxqs: Half for each of the two ports :max_oq/2  */
+               .max_rxqs                       = MAX_RXQS_PER_INTF,
+
+               /* Actual configured value. Range could be: 1...max_rxqs */
+               .num_rxqs                       = DEF_RXQS_PER_INTF,
+
+               /* Num of desc for rx rings */
+               .num_rx_descs                   = CN23XX_MAX_OQ_DESCRIPTORS,
+
+               /* Num of desc for tx rings */
+               .num_tx_descs                   = CN23XX_MAX_IQ_DESCRIPTORS,
+
+               /* SKB size, We need not change buf size even for Jumbo frames.
+                * Octeon can send jumbo frames in 4 consecutive descriptors,
+                */
+               .rx_buf_size                    = CN23XX_OQ_BUF_SIZE,
+
+               .base_queue                     = BASE_QUEUE_NOT_REQUESTED,
+
+               .gmx_port_id                    = 0,
+       },
+
+       .nic_if_cfg[1] = {
+               /* Max Txqs: Half for each of the two ports :max_iq/2 */
+               .max_txqs                       = MAX_TXQS_PER_INTF,
+
+               /* Actual configured value. Range could be: 1...max_txqs */
+               .num_txqs                       = DEF_TXQS_PER_INTF,
+
+               /* Max Rxqs: Half for each of the two ports :max_oq/2  */
+               .max_rxqs                       = MAX_RXQS_PER_INTF,
+
+               /* Actual configured value. Range could be: 1...max_rxqs */
+               .num_rxqs                       = DEF_RXQS_PER_INTF,
+
+               /* Num of desc for rx rings */
+               .num_rx_descs                   = CN23XX_MAX_OQ_DESCRIPTORS,
+
+               /* Num of desc for tx rings */
+               .num_tx_descs                   = CN23XX_MAX_IQ_DESCRIPTORS,
+
+               /* SKB size, We need not change buf size even for Jumbo frames.
+                * Octeon can send jumbo frames in 4 consecutive descriptors,
+                */
+               .rx_buf_size                    = CN23XX_OQ_BUF_SIZE,
+
+               .base_queue                     = BASE_QUEUE_NOT_REQUESTED,
+
+               .gmx_port_id                    = 1,
+       },
+
+       .misc                                   = {
+               /* Host driver link query interval */
+               .oct_link_query_interval        = 100,
+
+               /* Octeon link query interval */
+               .host_link_query_interval       = 500,
+
+               .enable_sli_oq_bp               = 0,
+
+               /* Control queue group */
+               .ctrlq_grp                      = 1,
+       }
+};
+
 enum {
        OCTEON_CONFIG_TYPE_DEFAULT = 0,
        NUM_OCTEON_CONFS,
@@ -487,6 +587,8 @@ static void *__retrieve_octeon_config_info(struct octeon_device *oct,
                } else if ((oct->chip_id == OCTEON_CN68XX) &&
                           (card_type == LIO_410NV)) {
                        ret =  (void *)&default_cn68xx_conf;
+               } else if (oct->chip_id == OCTEON_CN23XX_PF_VID) {
+                       ret =  (void *)&default_cn23xx_conf;
                }
                break;
        default:
@@ -501,7 +603,8 @@ static int __verify_octeon_config_info(struct octeon_device *oct, void *conf)
        case OCTEON_CN66XX:
        case OCTEON_CN68XX:
                return lio_validate_cn6xxx_config_info(oct, conf);
-
+       case OCTEON_CN23XX_PF_VID:
+               return 0;
        default:
                break;
        }
@@ -541,107 +644,6 @@ static char *get_oct_app_string(u32 app_mode)
        return oct_dev_app_str[CVM_DRV_INVALID_APP - CVM_DRV_APP_START];
 }
 
-u8 fbuf[4 * 1024 * 1024];
-
-int octeon_download_firmware(struct octeon_device *oct, const u8 *data,
-                            size_t size)
-{
-       int ret = 0;
-       u8 *p = fbuf;
-       u32 crc32_result;
-       u64 load_addr;
-       u32 image_len;
-       struct octeon_firmware_file_header *h;
-       u32 i, rem, base_len = strlen(LIQUIDIO_BASE_VERSION);
-       char *base;
-
-       if (size < sizeof(struct octeon_firmware_file_header)) {
-               dev_err(&oct->pci_dev->dev, "Firmware file too small (%d < %d).\n",
-                       (u32)size,
-                       (u32)sizeof(struct octeon_firmware_file_header));
-               return -EINVAL;
-       }
-
-       h = (struct octeon_firmware_file_header *)data;
-
-       if (be32_to_cpu(h->magic) != LIO_NIC_MAGIC) {
-               dev_err(&oct->pci_dev->dev, "Unrecognized firmware file.\n");
-               return -EINVAL;
-       }
-
-       crc32_result = crc32((unsigned int)~0, data,
-                            sizeof(struct octeon_firmware_file_header) -
-                            sizeof(u32)) ^ ~0U;
-       if (crc32_result != be32_to_cpu(h->crc32)) {
-               dev_err(&oct->pci_dev->dev, "Firmware CRC mismatch (0x%08x != 0x%08x).\n",
-                       crc32_result, be32_to_cpu(h->crc32));
-               return -EINVAL;
-       }
-
-       if (strncmp(LIQUIDIO_PACKAGE, h->version, strlen(LIQUIDIO_PACKAGE))) {
-               dev_err(&oct->pci_dev->dev, "Unmatched firmware package type. Expected %s, got %s.\n",
-                       LIQUIDIO_PACKAGE, h->version);
-               return -EINVAL;
-       }
-
-       base = h->version + strlen(LIQUIDIO_PACKAGE);
-       ret = memcmp(LIQUIDIO_BASE_VERSION, base, base_len);
-       if (ret) {
-               dev_err(&oct->pci_dev->dev, "Unmatched firmware version. Expected %s.x, got %s.\n",
-                       LIQUIDIO_BASE_VERSION, base);
-               return -EINVAL;
-       }
-
-       if (be32_to_cpu(h->num_images) > LIO_MAX_IMAGES) {
-               dev_err(&oct->pci_dev->dev, "Too many images in firmware file (%d).\n",
-                       be32_to_cpu(h->num_images));
-               return -EINVAL;
-       }
-
-       dev_info(&oct->pci_dev->dev, "Firmware version: %s\n", h->version);
-       snprintf(oct->fw_info.liquidio_firmware_version, 32, "LIQUIDIO: %s",
-                h->version);
-
-       data += sizeof(struct octeon_firmware_file_header);
-
-       dev_info(&oct->pci_dev->dev, "%s: Loading %d images\n", __func__,
-                be32_to_cpu(h->num_images));
-       /* load all images */
-       for (i = 0; i < be32_to_cpu(h->num_images); i++) {
-               load_addr = be64_to_cpu(h->desc[i].addr);
-               image_len = be32_to_cpu(h->desc[i].len);
-
-               dev_info(&oct->pci_dev->dev, "Loading firmware %d at %llx\n",
-                        image_len, load_addr);
-
-               /* Write in 4MB chunks*/
-               rem = image_len;
-
-               while (rem) {
-                       if (rem < (4 * 1024 * 1024))
-                               size = rem;
-                       else
-                               size = 4 * 1024 * 1024;
-
-                       memcpy(p, data, size);
-
-                       /* download the image */
-                       octeon_pci_write_core_mem(oct, load_addr, p, (u32)size);
-
-                       data += size;
-                       rem -= (u32)size;
-                       load_addr += size;
-               }
-       }
-       dev_info(&oct->pci_dev->dev, "Writing boot command: %s\n",
-                h->bootcmd);
-
-       /* Invoke the bootcmd */
-       ret = octeon_console_send_cmd(oct, h->bootcmd, 50);
-
-       return 0;
-}
-
 void octeon_free_device_mem(struct octeon_device *oct)
 {
        int i;
@@ -676,6 +678,9 @@ static struct octeon_device *octeon_allocate_device_mem(u32 pci_id,
                configsize = sizeof(struct octeon_cn6xxx);
                break;
 
+       case OCTEON_CN23XX_PF_VID:
+               configsize = sizeof(struct octeon_cn23xx_pf);
+               break;
        default:
                pr_err("%s: Unknown PCI Device: 0x%x\n",
                       __func__,
@@ -741,6 +746,45 @@ struct octeon_device *octeon_allocate_device(u32 pci_id,
        return oct;
 }
 
+int
+octeon_allocate_ioq_vector(struct octeon_device  *oct)
+{
+       int i, num_ioqs = 0;
+       struct octeon_ioq_vector *ioq_vector;
+       int cpu_num;
+       int size;
+
+       if (OCTEON_CN23XX_PF(oct))
+               num_ioqs = oct->sriov_info.num_pf_rings;
+       size = sizeof(struct octeon_ioq_vector) * num_ioqs;
+
+       oct->ioq_vector = vmalloc(size);
+       if (!oct->ioq_vector)
+               return 1;
+       memset(oct->ioq_vector, 0, size);
+       for (i = 0; i < num_ioqs; i++) {
+               ioq_vector              = &oct->ioq_vector[i];
+               ioq_vector->oct_dev     = oct;
+               ioq_vector->iq_index    = i;
+               ioq_vector->droq_index  = i;
+
+               cpu_num = i % num_online_cpus();
+               cpumask_set_cpu(cpu_num, &ioq_vector->affinity_mask);
+
+               if (oct->chip_id == OCTEON_CN23XX_PF_VID)
+                       ioq_vector->ioq_num     = i + oct->sriov_info.pf_srn;
+               else
+                       ioq_vector->ioq_num     = i;
+       }
+       return 0;
+}
+
+void
+octeon_free_ioq_vector(struct octeon_device *oct)
+{
+       vfree(oct->ioq_vector);
+}
+
 /* this function is only for setting up the first queue */
 int octeon_setup_instr_queues(struct octeon_device *oct)
 {
@@ -749,10 +793,12 @@ int octeon_setup_instr_queues(struct octeon_device *oct)
        union oct_txpciq txpciq;
        int numa_node = cpu_to_node(iq_no % num_online_cpus());
 
-       /* this causes queue 0 to be default queue */
        if (OCTEON_CN6XXX(oct))
                num_descs =
                        CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn6xxx, conf));
+       else if (OCTEON_CN23XX_PF(oct))
+               num_descs = CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn23xx_pf,
+                                                               conf));
 
        oct->num_iqs = 0;
 
@@ -769,6 +815,7 @@ int octeon_setup_instr_queues(struct octeon_device *oct)
        oct->instr_queue[0]->ifidx = 0;
        txpciq.u64 = 0;
        txpciq.s.q_no = iq_no;
+       txpciq.s.pkind = oct->pfvf_hsword.pkind;
        txpciq.s.use_qpg = 0;
        txpciq.s.qpg = 0;
        if (octeon_init_instr_queue(oct, txpciq, num_descs)) {
@@ -788,14 +835,17 @@ int octeon_setup_output_queues(struct octeon_device *oct)
        u32 oq_no = 0;
        int numa_node = cpu_to_node(oq_no % num_online_cpus());
 
-       /* this causes queue 0 to be default queue */
        if (OCTEON_CN6XXX(oct)) {
                num_descs =
                        CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn6xxx, conf));
                desc_size =
                        CFG_GET_DEF_RX_BUF_SIZE(CHIP_FIELD(oct, cn6xxx, conf));
+       } else if (OCTEON_CN23XX_PF(oct)) {
+               num_descs = CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn23xx_pf,
+                                                               conf));
+               desc_size = CFG_GET_DEF_RX_BUF_SIZE(CHIP_FIELD(oct, cn23xx_pf,
+                                                              conf));
        }
-
        oct->num_oqs = 0;
        oct->droq[0] = vmalloc_node(sizeof(*oct->droq[0]), numa_node);
        if (!oct->droq[0])
@@ -812,10 +862,10 @@ int octeon_setup_output_queues(struct octeon_device *oct)
 
 void octeon_set_io_queues_off(struct octeon_device *oct)
 {
-       /* Disable the i/p and o/p queues for this Octeon. */
-
-       octeon_write_csr(oct, CN6XXX_SLI_PKT_INSTR_ENB, 0);
-       octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, 0);
+       if (OCTEON_CN6XXX(oct)) {
+               octeon_write_csr(oct, CN6XXX_SLI_PKT_INSTR_ENB, 0);
+               octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, 0);
+       }
 }
 
 void octeon_set_droq_pkt_op(struct octeon_device *oct,
@@ -825,14 +875,16 @@ void octeon_set_droq_pkt_op(struct octeon_device *oct,
        u32 reg_val = 0;
 
        /* Disable the i/p and o/p queues for this Octeon. */
-       reg_val = octeon_read_csr(oct, CN6XXX_SLI_PKT_OUT_ENB);
+       if (OCTEON_CN6XXX(oct)) {
+               reg_val = octeon_read_csr(oct, CN6XXX_SLI_PKT_OUT_ENB);
 
-       if (enable)
-               reg_val = reg_val | (1 << q_no);
-       else
-               reg_val = reg_val & (~(1 << q_no));
+               if (enable)
+                       reg_val = reg_val | (1 << q_no);
+               else
+                       reg_val = reg_val & (~(1 << q_no));
 
-       octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, reg_val);
+               octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, reg_val);
+       }
 }
 
 int octeon_init_dispatch_list(struct octeon_device *oct)
@@ -1019,6 +1071,9 @@ int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf)
        if (OCTEON_CN6XXX(oct))
                num_nic_ports =
                        CFG_GET_NUM_NIC_PORTS(CHIP_FIELD(oct, cn6xxx, conf));
+       else if (OCTEON_CN23XX_PF(oct))
+               num_nic_ports =
+                       CFG_GET_NUM_NIC_PORTS(CHIP_FIELD(oct, cn23xx_pf, conf));
 
        if (atomic_read(&oct->status) >= OCT_DEV_RUNNING) {
                dev_err(&oct->pci_dev->dev, "Received CORE OK when device state is 0x%x\n",
@@ -1046,6 +1101,12 @@ int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf)
        }
        oct->fw_info.app_cap_flags = recv_pkt->rh.r_core_drv_init.app_cap_flags;
        oct->fw_info.app_mode = (u32)recv_pkt->rh.r_core_drv_init.app_mode;
+       oct->pfvf_hsword.app_mode = (u32)recv_pkt->rh.r_core_drv_init.app_mode;
+
+       oct->pfvf_hsword.pkind = recv_pkt->rh.r_core_drv_init.pkind;
+
+       for (i = 0; i < oct->num_iqs; i++)
+               oct->instr_queue[i]->txpciq.s.pkind = oct->pfvf_hsword.pkind;
 
        atomic_set(&oct->status, OCT_DEV_CORE_OK);
 
@@ -1108,8 +1169,10 @@ struct octeon_config *octeon_get_conf(struct octeon_device *oct)
        if (OCTEON_CN6XXX(oct)) {
                default_oct_conf =
                        (struct octeon_config *)(CHIP_FIELD(oct, cn6xxx, conf));
+       } else if (OCTEON_CN23XX_PF(oct)) {
+               default_oct_conf = (struct octeon_config *)
+                       (CHIP_FIELD(oct, cn23xx_pf, conf));
        }
-
        return default_oct_conf;
 }
 
@@ -1141,7 +1204,9 @@ u64 lio_pci_readq(struct octeon_device *oct, u64 addr)
         * So write MSB first
         */
        addrhi = (addr >> 32);
-       if ((oct->chip_id == OCTEON_CN66XX) || (oct->chip_id == OCTEON_CN68XX))
+       if ((oct->chip_id == OCTEON_CN66XX) ||
+           (oct->chip_id == OCTEON_CN68XX) ||
+           (oct->chip_id == OCTEON_CN23XX_PF_VID))
                addrhi |= 0x00060000;
        writel(addrhi, oct->reg_list.pci_win_rd_addr_hi);
 
@@ -1185,8 +1250,15 @@ int octeon_mem_access_ok(struct octeon_device *oct)
        u64 lmc0_reset_ctl;
 
        /* Check to make sure a DDR interface is enabled */
-       lmc0_reset_ctl = lio_pci_readq(oct, CN6XXX_LMC0_RESET_CTL);
-       access_okay = (lmc0_reset_ctl & CN6XXX_LMC0_RESET_CTL_DDR3RST_MASK);
+       if (OCTEON_CN23XX_PF(oct)) {
+               lmc0_reset_ctl = lio_pci_readq(oct, CN23XX_LMC0_RESET_CTL);
+               access_okay =
+                       (lmc0_reset_ctl & CN23XX_LMC0_RESET_CTL_DDR3RST_MASK);
+       } else {
+               lmc0_reset_ctl = lio_pci_readq(oct, CN6XXX_LMC0_RESET_CTL);
+               access_okay =
+                       (lmc0_reset_ctl & CN6XXX_LMC0_RESET_CTL_DDR3RST_MASK);
+       }
 
        return access_okay ? 0 : 1;
 }
@@ -1226,3 +1298,39 @@ int lio_get_device_id(void *dev)
                        return octeon_dev->octeon_id;
        return -1;
 }
+
+void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq)
+{
+       u64 instr_cnt;
+       struct octeon_device *oct = NULL;
+
+       /* the whole thing needs to be atomic, ideally */
+       if (droq) {
+               spin_lock_bh(&droq->lock);
+               writel(droq->pkt_count, droq->pkts_sent_reg);
+               droq->pkt_count = 0;
+               spin_unlock_bh(&droq->lock);
+               oct = droq->oct_dev;
+       }
+       if (iq) {
+               spin_lock_bh(&iq->lock);
+               writel(iq->pkt_in_done, iq->inst_cnt_reg);
+               iq->pkt_in_done = 0;
+               spin_unlock_bh(&iq->lock);
+               oct = iq->oct_dev;
+       }
+       /*write resend. Writing RESEND in SLI_PKTX_CNTS should be enough
+        *to trigger tx interrupts as well, if they are pending.
+        */
+       if (oct && OCTEON_CN23XX_PF(oct)) {
+               if (droq)
+                       writeq(CN23XX_INTR_RESEND, droq->pkts_sent_reg);
+               /*we race with firmrware here. read and write the IN_DONE_CNTS*/
+               else if (iq) {
+                       instr_cnt =  readq(iq->inst_cnt_reg);
+                       writeq(((instr_cnt & 0xFFFFFFFF00000000ULL) |
+                               CN23XX_INTR_RESEND),
+                              iq->inst_cnt_reg);
+               }
+       }
+}
index 01edfb4..da15c2a 100644 (file)
 /** PCI VendorId Device Id */
 #define  OCTEON_CN68XX_PCIID          0x91177d
 #define  OCTEON_CN66XX_PCIID          0x92177d
-
+#define  OCTEON_CN23XX_PCIID_PF       0x9702177d
 /** Driver identifies chips by these Ids, created by clubbing together
  *  DeviceId+RevisionId; Where Revision Id is not used to distinguish
  *  between chips, a value of 0 is used for revision id.
  */
 #define  OCTEON_CN68XX                0x0091
 #define  OCTEON_CN66XX                0x0092
+#define  OCTEON_CN23XX_PF_VID         0x9702
+
+/**RevisionId for the chips */
+#define  OCTEON_CN23XX_REV_1_0        0x00
+#define  OCTEON_CN23XX_REV_1_1        0x01
+#define  OCTEON_CN23XX_REV_2_0        0x80
 
 /** Endian-swap modes supported by Octeon. */
 enum octeon_pci_swap_mode {
@@ -46,6 +52,9 @@ enum octeon_pci_swap_mode {
        OCTEON_PCI_32BIT_LW_SWAP = 3
 };
 
+#define  OCTEON_OUTPUT_INTR   (2)
+#define  OCTEON_ALL_INTR      0xff
+
 /*---------------   PCI BAR1 index registers -------------*/
 
 /* BAR1 Mask */
@@ -198,9 +207,9 @@ struct octeon_fn_list {
        void (*setup_oq_regs)(struct octeon_device *, u32);
 
        irqreturn_t (*process_interrupt_regs)(void *);
+       u64 (*msix_interrupt_handler)(void *);
        int (*soft_reset)(struct octeon_device *);
        int (*setup_device_regs)(struct octeon_device *);
-       void (*reinit_regs)(struct octeon_device *);
        void (*bar1_idx_setup)(struct octeon_device *, u64, u32, int);
        void (*bar1_idx_write)(struct octeon_device *, u32, u32);
        u32 (*bar1_idx_read)(struct octeon_device *, u32);
@@ -209,10 +218,10 @@ struct octeon_fn_list {
        void (*enable_oq_pkt_time_intr)(struct octeon_device *, u32);
        void (*disable_oq_pkt_time_intr)(struct octeon_device *, u32);
 
-       void (*enable_interrupt)(void *);
-       void (*disable_interrupt)(void *);
+       void (*enable_interrupt)(struct octeon_device *, u8);
+       void (*disable_interrupt)(struct octeon_device *, u8);
 
-       void (*enable_io_queues)(struct octeon_device *);
+       int (*enable_io_queues)(struct octeon_device *);
        void (*disable_io_queues)(struct octeon_device *);
 };
 
@@ -266,11 +275,72 @@ struct octdev_props {
        /* Each interface in the Octeon device has a network
         * device pointer (used for OS specific calls).
         */
+       int    rx_on;
        int    napi_enabled;
        int    gmxport;
        struct net_device *netdev;
 };
 
+#define LIO_FLAG_MSIX_ENABLED  0x1
+#define MSIX_PO_INT            0x1
+#define MSIX_PI_INT            0x2
+
+struct octeon_pf_vf_hs_word {
+#ifdef __LITTLE_ENDIAN_BITFIELD
+       /** PKIND value assigned for the DPI interface */
+       u64        pkind : 8;
+
+       /** OCTEON core clock multiplier   */
+       u64        core_tics_per_us : 16;
+
+       /** OCTEON coprocessor clock multiplier  */
+       u64        coproc_tics_per_us : 16;
+
+       /** app that currently running on OCTEON  */
+       u64        app_mode : 8;
+
+       /** RESERVED */
+       u64 reserved : 16;
+
+#else
+
+       /** RESERVED */
+       u64 reserved : 16;
+
+       /** app that currently running on OCTEON  */
+       u64        app_mode : 8;
+
+       /** OCTEON coprocessor clock multiplier  */
+       u64        coproc_tics_per_us : 16;
+
+       /** OCTEON core clock multiplier   */
+       u64        core_tics_per_us : 16;
+
+       /** PKIND value assigned for the DPI interface */
+       u64        pkind : 8;
+#endif
+};
+
+struct octeon_sriov_info {
+       /* Actual rings left for PF device */
+       u32     num_pf_rings;
+
+       /* SRN of PF usable IO queues   */
+       u32     pf_srn;
+       /* total pf rings */
+       u32     trs;
+
+};
+
+struct octeon_ioq_vector {
+       struct octeon_device   *oct_dev;
+       int                     iq_index;
+       int                     droq_index;
+       int                     vector;
+       struct cpumask          affinity_mask;
+       u32                     ioq_num;
+};
+
 /** The Octeon device.
  *  Each Octeon device has this structure to represent all its
  *  components.
@@ -296,7 +366,7 @@ struct octeon_device {
        /** Octeon Chip type. */
        u16 chip_id;
        u16 rev_id;
-
+       u16 pf_num;
        /** This device's id - set by the driver. */
        u32 octeon_id;
 
@@ -305,7 +375,6 @@ struct octeon_device {
 
        u16 flags;
 #define LIO_FLAG_MSI_ENABLED                  (u32)(1 << 1)
-#define LIO_FLAG_MSIX_ENABLED                 (u32)(1 << 2)
 
        /** The state of this device */
        atomic_t status;
@@ -395,6 +464,19 @@ struct octeon_device {
 
        void *priv;
 
+       int num_msix_irqs;
+
+       void *msix_entries;
+
+       struct octeon_sriov_info sriov_info;
+
+       struct octeon_pf_vf_hs_word pfvf_hsword;
+
+       int msix_on;
+
+       /** IOq information of it's corresponding MSI-X interrupt. */
+       struct octeon_ioq_vector    *ioq_vector;
+
        int rx_pause;
        int tx_pause;
 
@@ -402,12 +484,15 @@ struct octeon_device {
 
        /* private flags to control driver-specific features through ethtool */
        u32 priv_flags;
+
+       void *watchdog_task;
 };
 
 #define  OCT_DRV_ONLINE 1
 #define  OCT_DRV_OFFLINE 2
 #define  OCTEON_CN6XXX(oct)           ((oct->chip_id == OCTEON_CN66XX) || \
                                       (oct->chip_id == OCTEON_CN68XX))
+#define  OCTEON_CN23XX_PF(oct)        (oct->chip_id == OCTEON_CN23XX_PF_VID)
 #define CHIP_FIELD(oct, TYPE, field)             \
        (((struct octeon_ ## TYPE  *)(oct->chip))->field)
 
@@ -661,13 +746,24 @@ void *oct_get_config_info(struct octeon_device *oct, u16 card_type);
  */
 struct octeon_config *octeon_get_conf(struct octeon_device *oct);
 
+void octeon_free_ioq_vector(struct octeon_device *oct);
+int octeon_allocate_ioq_vector(struct octeon_device  *oct);
+void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq);
+
 /* LiquidIO driver pivate flags */
 enum {
        OCT_PRIV_FLAG_TX_BYTES = 0, /* Tx interrupts by pending byte count */
 };
 
-static inline void lio_set_priv_flag(struct octeon_device *octdev, u32 flag,
-                                    u32 val)
+#define OCT_PRIV_FLAG_DEFAULT 0x0
+
+static inline u32 lio_get_priv_flag(struct octeon_device *octdev, u32 flag)
+{
+       return !!(octdev->priv_flags & (0x1 << flag));
+}
+
+static inline void lio_set_priv_flag(struct octeon_device *octdev,
+                                    u32 flag, u32 val)
 {
        if (val)
                octdev->priv_flags |= (0x1 << flag);
index e0afe4c..f60e532 100644 (file)
@@ -31,6 +31,7 @@
 #include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
+#include "cn23xx_pf_device.h"
 
 #define     CVM_MIN(d1, d2)           (((d1) < (d2)) ? (d1) : (d2))
 #define     CVM_MAX(d1, d2)           (((d1) > (d2)) ? (d1) : (d2))
@@ -92,22 +93,25 @@ static inline void *octeon_get_dispatch_arg(struct octeon_device *octeon_dev,
        return fn_arg;
 }
 
-/** Check for packets on Droq. This function should be called with
- * lock held.
+/** Check for packets on Droq. This function should be called with lock held.
  *  @param  droq - Droq on which count is checked.
  *  @return Returns packet count.
  */
 u32 octeon_droq_check_hw_for_pkts(struct octeon_droq *droq)
 {
        u32 pkt_count = 0;
+       u32 last_count;
 
        pkt_count = readl(droq->pkts_sent_reg);
-       if (pkt_count) {
-               atomic_add(pkt_count, &droq->pkts_pending);
-               writel(pkt_count, droq->pkts_sent_reg);
-       }
 
-       return pkt_count;
+       last_count = pkt_count - droq->pkt_count;
+       droq->pkt_count = pkt_count;
+
+       /* we shall write to cnts  at napi irq enable or end of droq tasklet */
+       if (last_count)
+               atomic_add(last_count, &droq->pkts_pending);
+
+       return last_count;
 }
 
 static void octeon_droq_compute_max_packet_bufs(struct octeon_droq *droq)
@@ -259,6 +263,11 @@ int octeon_init_droq(struct octeon_device *oct,
                c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf6x);
                c_refill_threshold =
                        (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf6x);
+       } else if (OCTEON_CN23XX_PF(oct)) {
+               struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf);
+
+               c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf23);
+               c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf23);
        } else {
                return 1;
        }
@@ -564,7 +573,7 @@ octeon_droq_dispatch_pkt(struct octeon_device *oct,
                        (unsigned int)rh->r.opcode,
                        (unsigned int)rh->r.subcode);
                droq->stats.dropped_nodispatch++;
-       }                       /* else (dispatch_fn ... */
+       }
 
        return cnt;
 }
@@ -735,16 +744,20 @@ octeon_droq_process_packets(struct octeon_device *oct,
        u32 pkt_count = 0, pkts_processed = 0;
        struct list_head *tmp, *tmp2;
 
+       /* Grab the droq lock */
+       spin_lock(&droq->lock);
+
+       octeon_droq_check_hw_for_pkts(droq);
        pkt_count = atomic_read(&droq->pkts_pending);
-       if (!pkt_count)
+
+       if (!pkt_count) {
+               spin_unlock(&droq->lock);
                return 0;
+       }
 
        if (pkt_count > budget)
                pkt_count = budget;
 
-       /* Grab the droq lock */
-       spin_lock(&droq->lock);
-
        pkts_processed = octeon_droq_fast_process_packets(oct, droq, pkt_count);
 
        atomic_sub(pkts_processed, &droq->pkts_pending);
@@ -789,6 +802,8 @@ octeon_droq_process_poll_pkts(struct octeon_device *oct,
        spin_lock(&droq->lock);
 
        while (total_pkts_processed < budget) {
+               octeon_droq_check_hw_for_pkts(droq);
+
                pkts_available =
                        CVM_MIN((budget - total_pkts_processed),
                                (u32)(atomic_read(&droq->pkts_pending)));
@@ -803,8 +818,6 @@ octeon_droq_process_poll_pkts(struct octeon_device *oct,
                atomic_sub(pkts_processed, &droq->pkts_pending);
 
                total_pkts_processed += pkts_processed;
-
-               octeon_droq_check_hw_for_pkts(droq);
        }
 
        spin_unlock(&droq->lock);
@@ -874,8 +887,11 @@ octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no, int cmd,
                        return 0;
                }
                break;
+               case OCTEON_CN23XX_PF_VID: {
+                       lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
+               }
+               break;
                }
-
                return 0;
        }
 
index 5a6fb91..5be002d 100644 (file)
@@ -261,6 +261,8 @@ struct octeon_droq {
 
        u32 q_no;
 
+       u32 pkt_count;
+
        struct octeon_droq_ops ops;
 
        struct octeon_device *oct_dev;
index ff4b1d6..e4d426b 100644 (file)
@@ -88,6 +88,8 @@ struct octeon_instr_queue {
        /** A spinlock to protect while posting on the ring.  */
        spinlock_t post_lock;
 
+       u32 pkt_in_done;
+
        /** A spinlock to protect access to the input ring.*/
        spinlock_t iq_flush_running_lock;
 
index bc14e4c..366298f 100644 (file)
 
 #define DRV_NAME "LiquidIO"
 
-/**
- * \brief determines if a given console has debug enabled.
- * @param console console to check
- * @returns  1 = enabled. 0 otherwise
+/** This structure is used by NIC driver to store information required
+ * to free the sk_buff when the packet has been fetched by Octeon.
+ * Bytes offset below assume worst-case of a 64-bit system.
  */
-int octeon_console_debug_enabled(u32 console);
+struct octnet_buf_free_info {
+       /** Bytes 1-8.  Pointer to network device private structure. */
+       struct lio *lio;
+
+       /** Bytes 9-16.  Pointer to sk_buff. */
+       struct sk_buff *skb;
+
+       /** Bytes 17-24.  Pointer to gather list. */
+       struct octnic_gather *g;
+
+       /** Bytes 25-32. Physical address of skb->data or gather list. */
+       u64 dptr;
+
+       /** Bytes 33-47. Piggybacked soft command, if any */
+       struct octeon_soft_command *sc;
+};
 
 /* BQL-related functions */
 void octeon_report_sent_bytes_to_bql(void *buf, int reqtype);
@@ -167,22 +181,26 @@ cnnic_numa_alloc_aligned_dma(u32 size,
 #define cnnic_free_aligned_dma(pci_dev, ptr, size, orig_ptr, dma_addr) \
                free_pages(orig_ptr, get_order(size))
 
-static inline void
+static inline int
 sleep_cond(wait_queue_head_t *wait_queue, int *condition)
 {
+       int errno = 0;
        wait_queue_t we;
 
        init_waitqueue_entry(&we, current);
        add_wait_queue(wait_queue, &we);
        while (!(READ_ONCE(*condition))) {
                set_current_state(TASK_INTERRUPTIBLE);
-               if (signal_pending(current))
+               if (signal_pending(current)) {
+                       errno = -EINTR;
                        goto out;
+               }
                schedule();
        }
 out:
        set_current_state(TASK_RUNNING);
        remove_wait_queue(wait_queue, &we);
+       return errno;
 }
 
 static inline void
index 95a4bbe..0dc081a 100644 (file)
@@ -19,7 +19,6 @@
  * This file may also be available under a different license from Cavium.
  * Contact Cavium, Inc. for more information
  **********************************************************************/
-#include <linux/pci.h>
 #include <linux/netdevice.h>
 #include "liquidio_common.h"
 #include "octeon_droq.h"
index fb820dc..e5d1deb 100644 (file)
@@ -26,8 +26,6 @@
 
 #ifndef __OCTEON_NETWORK_H__
 #define __OCTEON_NETWORK_H__
-#include <linux/version.h>
-#include <linux/dma-mapping.h>
 #include <linux/ptp_clock_kernel.h>
 
 #define LIO_MAX_MTU_SIZE (OCTNET_MAX_FRM_SIZE - OCTNET_FRM_HEADER_SIZE)
@@ -124,11 +122,21 @@ struct lio {
 
        /* work queue for  txq status */
        struct cavium_wq        txq_status_wq;
+
+       /* work queue for  link status */
+       struct cavium_wq        link_status_wq;
+
 };
 
 #define LIO_SIZE         (sizeof(struct lio))
 #define GET_LIO(netdev)  ((struct lio *)netdev_priv(netdev))
 
+#define CIU3_WDOG(c)                 (0x1010000020000ULL + (c << 3))
+#define CIU3_WDOG_MASK               12ULL
+#define LIO_MONITOR_WDOG_EXPIRE      1
+#define LIO_MONITOR_CORE_STUCK_MSGD  2
+#define LIO_MAX_CORES                12
+
 /**
  * \brief Enable or disable feature
  * @param netdev    pointer to network device
index 166727b..40ac1fe 100644 (file)
@@ -19,7 +19,6 @@
  * This file may also be available under a different license from Cavium.
  * Contact Cavium, Inc. for more information
  **********************************************************************/
-#include <linux/interrupt.h>
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include "liquidio_common.h"
@@ -36,6 +35,7 @@ octeon_alloc_soft_command_resp(struct octeon_device    *oct,
                               u32                     rdatasize)
 {
        struct octeon_soft_command *sc;
+       struct octeon_instr_ih3  *ih3;
        struct octeon_instr_ih2  *ih2;
        struct octeon_instr_irh *irh;
        struct octeon_instr_rdp *rdp;
@@ -52,10 +52,19 @@ octeon_alloc_soft_command_resp(struct octeon_device    *oct,
        /* Add in the response related fields. Opcode and Param are already
         * there.
         */
-       ih2      = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
-       rdp     = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
-       irh     = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
-       ih2->fsz = 40; /* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
+       if (OCTEON_CN23XX_PF(oct)) {
+               ih3      = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
+               rdp     = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
+               irh     = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
+               /*pkiih3 + irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
+               ih3->fsz = LIO_SOFTCMDRESP_IH3;
+       } else {
+               ih2      = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+               rdp     = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
+               irh     = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
+               /* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
+               ih2->fsz = LIO_SOFTCMDRESP_IH2;
+       }
 
        irh->rflag = 1; /* a response is required */
 
@@ -64,7 +73,10 @@ octeon_alloc_soft_command_resp(struct octeon_device    *oct,
 
        *sc->status_word = COMPLETION_WORD_INIT;
 
-       sc->cmd.cmd2.rptr =  sc->dmarptr;
+       if (OCTEON_CN23XX_PF(oct))
+               sc->cmd.cmd3.rptr =  sc->dmarptr;
+       else
+               sc->cmd.cmd2.rptr =  sc->dmarptr;
 
        sc->wait_time = 1000;
        sc->timeout = jiffies + sc->wait_time;
@@ -73,12 +85,9 @@ octeon_alloc_soft_command_resp(struct octeon_device    *oct,
 }
 
 int octnet_send_nic_data_pkt(struct octeon_device *oct,
-                            struct octnic_data_pkt *ndata,
-                            u32 xmit_more)
+                            struct octnic_data_pkt *ndata)
 {
-       int ring_doorbell;
-
-       ring_doorbell = !xmit_more;
+       int ring_doorbell = 1;
 
        return octeon_send_command(oct, ndata->q_no, ring_doorbell, &ndata->cmd,
                                   ndata->buf, ndata->datasize,
@@ -183,8 +192,8 @@ octnet_send_nic_ctrl_pkt(struct octeon_device *oct,
        retval = octeon_send_soft_command(oct, sc);
        if (retval == IQ_SEND_FAILED) {
                octeon_free_soft_command(oct, sc);
-               dev_err(&oct->pci_dev->dev, "%s soft command:%d send failed status: %x\n",
-                       __func__, nctrl->ncmd.s.cmd, retval);
+               dev_err(&oct->pci_dev->dev, "%s pf_num:%d soft command:%d send failed status: %x\n",
+                       __func__, oct->pf_num, nctrl->ncmd.s.cmd, retval);
                spin_unlock_bh(&oct->cmd_resp_wqlock);
                return -1;
        }
index b71a2bb..4b8da67 100644 (file)
@@ -138,7 +138,7 @@ octnet_prepare_pci_cmd_o2(struct octeon_device *oct,
        /* assume that rflag is cleared so therefore front data will only have
         * irh and ossp[0], ossp[1] for a total of 32 bytes
         */
-       ih2->fsz = 24;
+       ih2->fsz = LIO_PCICMD_O2;
 
        ih2->tagtype = ORDERED_TAG;
        ih2->grp = DEFAULT_POW_GRP;
@@ -196,7 +196,7 @@ octnet_prepare_pci_cmd_o3(struct octeon_device *oct,
         */
        ih3->pkind       = oct->instr_queue[setup->s.iq_no]->txpciq.s.pkind;
        /*PKI IH*/
-       ih3->fsz = 24 + 8;
+       ih3->fsz = LIO_PCICMD_O3;
 
        if (!setup->s.gather) {
                ih3->dlengsz = setup->s.u.datasize;
@@ -278,7 +278,7 @@ octeon_alloc_soft_command_resp(struct octeon_device    *oct,
  * queue should be stopped, and IQ_SEND_OK if it sent okay.
  */
 int octnet_send_nic_data_pkt(struct octeon_device *oct,
-                            struct octnic_data_pkt *ndata, u32 xmit_more);
+                            struct octnic_data_pkt *ndata);
 
 /** Send a NIC control packet to the device
  * @param oct - octeon device pointer
index d32492f..90866bb 100644 (file)
@@ -30,6 +30,7 @@
 #include "octeon_main.h"
 #include "octeon_network.h"
 #include "cn66xx_device.h"
+#include "cn23xx_pf_device.h"
 
 #define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count)  \
        (octeon_dev_ptr->instr_queue[iq_no]->stats.field += count)
@@ -71,7 +72,8 @@ int octeon_init_instr_queue(struct octeon_device *oct,
 
        if (OCTEON_CN6XXX(oct))
                conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn6xxx, conf)));
-
+       else if (OCTEON_CN23XX_PF(oct))
+               conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn23xx_pf, conf)));
        if (!conf) {
                dev_err(&oct->pci_dev->dev, "Unsupported Chip %x\n",
                        oct->chip_id);
@@ -88,6 +90,7 @@ int octeon_init_instr_queue(struct octeon_device *oct,
        q_size = (u32)conf->instr_type * num_descs;
 
        iq = oct->instr_queue[iq_no];
+
        iq->oct_dev = oct;
 
        set_dev_node(&oct->pci_dev->dev, numa_node);
@@ -181,6 +184,9 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no)
        if (OCTEON_CN6XXX(oct))
                desc_size =
                    CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn6xxx, conf));
+       else if (OCTEON_CN23XX_PF(oct))
+               desc_size =
+                   CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn23xx_pf, conf));
 
        vfree(iq->request_list);
 
@@ -383,7 +389,12 @@ lio_process_iq_request_list(struct octeon_device *oct,
                case REQTYPE_SOFT_COMMAND:
                        sc = buf;
 
-                       irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
+                       if (OCTEON_CN23XX_PF(oct))
+                               irh = (struct octeon_instr_irh *)
+                                       &sc->cmd.cmd3.irh;
+                       else
+                               irh = (struct octeon_instr_irh *)
+                                       &sc->cmd.cmd2.irh;
                        if (irh->rflag) {
                                /* We're expecting a response from Octeon.
                                 * It's up to lio_process_ordered_list() to
@@ -499,6 +510,7 @@ static void __check_db_timeout(struct octeon_device *oct, u64 iq_no)
 
        if (!oct)
                return;
+
        iq = oct->instr_queue[iq_no];
        if (!iq)
                return;
@@ -514,6 +526,8 @@ static void __check_db_timeout(struct octeon_device *oct, u64 iq_no)
 
        /* Flush the instruction queue */
        octeon_flush_iq(oct, iq, 1, 0);
+
+       lio_enable_irq(NULL, iq);
 }
 
 /* Called by the Poll thread at regular intervals to check the instruction
@@ -580,6 +594,8 @@ octeon_prepare_soft_command(struct octeon_device *oct,
 {
        struct octeon_config *oct_cfg;
        struct octeon_instr_ih2 *ih2;
+       struct octeon_instr_ih3 *ih3;
+       struct octeon_instr_pki_ih3 *pki_ih3;
        struct octeon_instr_irh *irh;
        struct octeon_instr_rdp *rdp;
 
@@ -588,36 +604,88 @@ octeon_prepare_soft_command(struct octeon_device *oct,
 
        oct_cfg = octeon_get_conf(oct);
 
-       ih2          = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
-       ih2->tagtype = ATOMIC_TAG;
-       ih2->tag     = LIO_CONTROL;
-       ih2->raw     = 1;
-       ih2->grp     = CFG_GET_CTRL_Q_GRP(oct_cfg);
-
-       if (sc->datasize) {
-               ih2->dlengsz = sc->datasize;
-               ih2->rs = 1;
-       }
-
-       irh            = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
-       irh->opcode    = opcode;
-       irh->subcode   = subcode;
-
-       /* opcode/subcode specific parameters (ossp) */
-       irh->ossp       = irh_ossp;
-       sc->cmd.cmd2.ossp[0] = ossp0;
-       sc->cmd.cmd2.ossp[1] = ossp1;
-
-       if (sc->rdatasize) {
-               rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
-               rdp->pcie_port = oct->pcie_port;
-               rdp->rlen      = sc->rdatasize;
+       if (OCTEON_CN23XX_PF(oct)) {
+               ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
+
+               ih3->pkind = oct->instr_queue[sc->iq_no]->txpciq.s.pkind;
+
+               pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3;
+
+               pki_ih3->w           = 1;
+               pki_ih3->raw         = 1;
+               pki_ih3->utag        = 1;
+               pki_ih3->uqpg        =
+                       oct->instr_queue[sc->iq_no]->txpciq.s.use_qpg;
+               pki_ih3->utt         = 1;
+               pki_ih3->tag     = LIO_CONTROL;
+               pki_ih3->tagtype = ATOMIC_TAG;
+               pki_ih3->qpg         =
+                       oct->instr_queue[sc->iq_no]->txpciq.s.qpg;
+               pki_ih3->pm          = 0x7;
+               pki_ih3->sl          = 8;
+
+               if (sc->datasize)
+                       ih3->dlengsz = sc->datasize;
+
+               irh            = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
+               irh->opcode    = opcode;
+               irh->subcode   = subcode;
+
+               /* opcode/subcode specific parameters (ossp) */
+               irh->ossp       = irh_ossp;
+               sc->cmd.cmd3.ossp[0] = ossp0;
+               sc->cmd.cmd3.ossp[1] = ossp1;
+
+               if (sc->rdatasize) {
+                       rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
+                       rdp->pcie_port = oct->pcie_port;
+                       rdp->rlen      = sc->rdatasize;
+
+                       irh->rflag =  1;
+                       /*PKI IH3*/
+                       /* pki_ih3 irh+ossp[0]+ossp[1]+rdp+rptr = 48 bytes */
+                       ih3->fsz    = LIO_SOFTCMDRESP_IH3;
+               } else {
+                       irh->rflag =  0;
+                       /*PKI IH3*/
+                       /* pki_h3 + irh + ossp[0] + ossp[1] = 32 bytes */
+                       ih3->fsz    = LIO_PCICMD_O3;
+               }
 
-               irh->rflag =  1;
-               ih2->fsz   = 40; /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
        } else {
-               irh->rflag =  0;
-               ih2->fsz   = 24; /* irh + ossp[0] + ossp[1] = 24 bytes */
+               ih2          = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+               ih2->tagtype = ATOMIC_TAG;
+               ih2->tag     = LIO_CONTROL;
+               ih2->raw     = 1;
+               ih2->grp     = CFG_GET_CTRL_Q_GRP(oct_cfg);
+
+               if (sc->datasize) {
+                       ih2->dlengsz = sc->datasize;
+                       ih2->rs = 1;
+               }
+
+               irh            = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
+               irh->opcode    = opcode;
+               irh->subcode   = subcode;
+
+               /* opcode/subcode specific parameters (ossp) */
+               irh->ossp       = irh_ossp;
+               sc->cmd.cmd2.ossp[0] = ossp0;
+               sc->cmd.cmd2.ossp[1] = ossp1;
+
+               if (sc->rdatasize) {
+                       rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
+                       rdp->pcie_port = oct->pcie_port;
+                       rdp->rlen      = sc->rdatasize;
+
+                       irh->rflag =  1;
+                       /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
+                       ih2->fsz   = LIO_SOFTCMDRESP_IH2;
+               } else {
+                       irh->rflag =  0;
+                       /* irh + ossp[0] + ossp[1] = 24 bytes */
+                       ih2->fsz   = LIO_PCICMD_O2;
+               }
        }
 }
 
@@ -625,23 +693,39 @@ int octeon_send_soft_command(struct octeon_device *oct,
                             struct octeon_soft_command *sc)
 {
        struct octeon_instr_ih2 *ih2;
+       struct octeon_instr_ih3 *ih3;
        struct octeon_instr_irh *irh;
        u32 len;
 
-       ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
-       if (ih2->dlengsz) {
-               WARN_ON(!sc->dmadptr);
-               sc->cmd.cmd2.dptr = sc->dmadptr;
-       }
-       irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
-       if (irh->rflag) {
-               WARN_ON(!sc->dmarptr);
-               WARN_ON(!sc->status_word);
-               *sc->status_word = COMPLETION_WORD_INIT;
-
-               sc->cmd.cmd2.rptr = sc->dmarptr;
+       if (OCTEON_CN23XX_PF(oct)) {
+               ih3 =  (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
+               if (ih3->dlengsz) {
+                       WARN_ON(!sc->dmadptr);
+                       sc->cmd.cmd3.dptr = sc->dmadptr;
+               }
+               irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
+               if (irh->rflag) {
+                       WARN_ON(!sc->dmarptr);
+                       WARN_ON(!sc->status_word);
+                       *sc->status_word = COMPLETION_WORD_INIT;
+                       sc->cmd.cmd3.rptr = sc->dmarptr;
+               }
+               len = (u32)ih3->dlengsz;
+       } else {
+               ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+               if (ih2->dlengsz) {
+                       WARN_ON(!sc->dmadptr);
+                       sc->cmd.cmd2.dptr = sc->dmadptr;
+               }
+               irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
+               if (irh->rflag) {
+                       WARN_ON(!sc->dmarptr);
+                       WARN_ON(!sc->status_word);
+                       *sc->status_word = COMPLETION_WORD_INIT;
+                       sc->cmd.cmd2.rptr = sc->dmarptr;
+               }
+               len = (u32)ih2->dlengsz;
        }
-       len = (u32)ih2->dlengsz;
 
        if (sc->wait_time)
                sc->timeout = jiffies + sc->wait_time;
index 709049e..be52178 100644 (file)
@@ -91,8 +91,13 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev,
 
                sc = (struct octeon_soft_command *)ordered_sc_list->
                    head.next;
-               rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
-               rptr = sc->cmd.cmd2.rptr;
+               if (OCTEON_CN23XX_PF(octeon_dev)) {
+                       rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
+                       rptr = sc->cmd.cmd3.rptr;
+               } else {
+                       rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
+                       rptr = sc->cmd.cmd2.rptr;
+               }
 
                status = OCTEON_REQUEST_PENDING;
 
index dd63f96..18d12d3 100644 (file)
@@ -258,6 +258,7 @@ struct nicvf {
        u8                      sqs_id;
        bool                    sqs_mode;
        bool                    hw_tso;
+       bool                    t88;
 
        /* Receive buffer alloc */
        u32                     rb_page_offset;
index 25618d2..2bbf4cb 100644 (file)
@@ -282,9 +282,14 @@ static void nic_set_tx_pkt_pad(struct nicpf *nic, int size)
        u16 sdevid;
        u64 lmac_cfg;
 
-       /* Max value that can be set is 60 */
-       if (size > 60)
-               size = 60;
+       /* There is a issue in HW where-in while sending GSO sized
+        * pkts as part of TSO, if pkt len falls below this size
+        * NIC will zero PAD packet and also updates IP total length.
+        * Hence set this value to lessthan min pkt size of MAC+IP+TCP
+        * headers, BGX will do the padding to transmit 64 byte pkt.
+        */
+       if (size > 52)
+               size = 52;
 
        pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
        /* 81xx's RGX has only one LMAC */
index db9c632..edf779f 100644 (file)
 #define   NIC_QSET_SQ_0_7_DOOR                 (0x010838)
 #define   NIC_QSET_SQ_0_7_STATUS               (0x010840)
 #define   NIC_QSET_SQ_0_7_DEBUG                        (0x010848)
-#define   NIC_QSET_SQ_0_7_CNM_CHG              (0x010860)
 #define   NIC_QSET_SQ_0_7_STAT_0_1             (0x010900)
 
 #define   NIC_QSET_RBDR_0_1_CFG                        (0x010C00)
index d2d8ef2..ad4fddb 100644 (file)
@@ -382,7 +382,10 @@ static void nicvf_get_regs(struct net_device *dev,
                p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DOOR, q);
                p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STATUS, q);
                p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DEBUG, q);
-               p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CNM_CHG, q);
+               /* Padding, was NIC_QSET_SQ_0_7_CNM_CHG, which
+                * produces bus errors when read
+                */
+               p[i++] = 0;
                p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1, q);
                reg_offset = NIC_QSET_SQ_0_7_STAT_0_1 | (1 << 3);
                p[i++] = nicvf_queue_reg_read(nic, reg_offset, q);
index 06c014e..7d00162 100644 (file)
@@ -522,6 +522,7 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev,
        struct nicvf *nic = netdev_priv(netdev);
        struct snd_queue *sq;
        struct sq_hdr_subdesc *hdr;
+       struct sq_hdr_subdesc *tso_sqe;
 
        sq = &nic->qs->sq[cqe_tx->sq_idx];
 
@@ -536,17 +537,21 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev,
 
        nicvf_check_cqe_tx_errs(nic, cq, cqe_tx);
        skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr];
-       /* For TSO offloaded packets only one SQE will have a valid SKB */
        if (skb) {
+               /* Check for dummy descriptor used for HW TSO offload on 88xx */
+               if (hdr->dont_send) {
+                       /* Get actual TSO descriptors and free them */
+                       tso_sqe =
+                        (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
+                       nicvf_put_sq_desc(sq, tso_sqe->subdesc_cnt + 1);
+               }
                nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
                prefetch(skb);
                napi_consume_skb(skb, budget);
                sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL;
        } else {
-               /* In case of HW TSO, HW sends a CQE for each segment of a TSO
-                * packet instead of a single CQE for the whole TSO packet
-                * transmitted. Each of this CQE points to the same SQE, so
-                * avoid freeing same SQE multiple times.
+               /* In case of SW TSO on 88xx, only last segment will have
+                * a SKB attached, so just free SQEs here.
                 */
                if (!nic->hw_tso)
                        nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
@@ -1516,6 +1521,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        struct net_device *netdev;
        struct nicvf *nic;
        int    err, qcount;
+       u16    sdevid;
 
        err = pci_enable_device(pdev);
        if (err) {
@@ -1588,6 +1594,10 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (!pass1_silicon(nic->pdev))
                nic->hw_tso = true;
 
+       pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
+       if (sdevid == 0xA134)
+               nic->t88 = true;
+
        /* Check if this VF is in QS only mode */
        if (nic->sqs_mode)
                return 0;
index 7d90856..178c5c7 100644 (file)
@@ -953,6 +953,8 @@ static int nicvf_tso_count_subdescs(struct sk_buff *skb)
        return num_edescs + sh->gso_segs;
 }
 
+#define POST_CQE_DESC_COUNT 2
+
 /* Get the number of SQ descriptors needed to xmit this skb */
 static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb)
 {
@@ -963,6 +965,10 @@ static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb)
                return subdesc_cnt;
        }
 
+       /* Dummy descriptors to get TSO pkt completion notification */
+       if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size)
+               subdesc_cnt += POST_CQE_DESC_COUNT;
+
        if (skb_shinfo(skb)->nr_frags)
                subdesc_cnt += skb_shinfo(skb)->nr_frags;
 
@@ -980,14 +986,21 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry,
        struct sq_hdr_subdesc *hdr;
 
        hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
-       sq->skbuff[qentry] = (u64)skb;
-
        memset(hdr, 0, SND_QUEUE_DESC_SIZE);
        hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
-       /* Enable notification via CQE after processing SQE */
-       hdr->post_cqe = 1;
-       /* No of subdescriptors following this */
-       hdr->subdesc_cnt = subdesc_cnt;
+
+       if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size) {
+               /* post_cqe = 0, to avoid HW posting a CQE for every TSO
+                * segment transmitted on 88xx.
+                */
+               hdr->subdesc_cnt = subdesc_cnt - POST_CQE_DESC_COUNT;
+       } else {
+               sq->skbuff[qentry] = (u64)skb;
+               /* Enable notification via CQE after processing SQE */
+               hdr->post_cqe = 1;
+               /* No of subdescriptors following this */
+               hdr->subdesc_cnt = subdesc_cnt;
+       }
        hdr->tot_len = len;
 
        /* Offload checksum calculation to HW */
@@ -1038,6 +1051,37 @@ static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
        gather->addr = data;
 }
 
+/* Add HDR + IMMEDIATE subdescriptors right after descriptors of a TSO
+ * packet so that a CQE is posted as a notifation for transmission of
+ * TSO packet.
+ */
+static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue *sq, int qentry,
+                                           int tso_sqe, struct sk_buff *skb)
+{
+       struct sq_imm_subdesc *imm;
+       struct sq_hdr_subdesc *hdr;
+
+       sq->skbuff[qentry] = (u64)skb;
+
+       hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
+       memset(hdr, 0, SND_QUEUE_DESC_SIZE);
+       hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
+       /* Enable notification via CQE after processing SQE */
+       hdr->post_cqe = 1;
+       /* There is no packet to transmit here */
+       hdr->dont_send = 1;
+       hdr->subdesc_cnt = POST_CQE_DESC_COUNT - 1;
+       hdr->tot_len = 1;
+       /* Actual TSO header SQE index, needed for cleanup */
+       hdr->rsvd2 = tso_sqe;
+
+       qentry = nicvf_get_nxt_sqentry(sq, qentry);
+       imm = (struct sq_imm_subdesc *)GET_SQ_DESC(sq, qentry);
+       memset(imm, 0, SND_QUEUE_DESC_SIZE);
+       imm->subdesc_type = SQ_DESC_TYPE_IMMEDIATE;
+       imm->len = 1;
+}
+
 /* Segment a TSO packet into 'gso_size' segments and append
  * them to SQ for transfer
  */
@@ -1111,7 +1155,7 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq,
 int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb)
 {
        int i, size;
-       int subdesc_cnt;
+       int subdesc_cnt, tso_sqe = 0;
        int sq_num, qentry;
        struct queue_set *qs;
        struct snd_queue *sq;
@@ -1146,6 +1190,7 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb)
        /* Add SQ header subdesc */
        nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1,
                                 skb, skb->len);
+       tso_sqe = qentry;
 
        /* Add SQ gather subdescs */
        qentry = nicvf_get_nxt_sqentry(sq, qentry);
@@ -1169,6 +1214,11 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb)
        }
 
 doorbell:
+       if (nic->t88 && skb_shinfo(skb)->gso_size) {
+               qentry = nicvf_get_nxt_sqentry(sq, qentry);
+               nicvf_sq_add_cqe_subdesc(sq, qentry, tso_sqe, skb);
+       }
+
        /* make sure all memory stores are done before ringing doorbell */
        smp_wmb();
 
index ace0ab9..2461296 100644 (file)
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
 
-cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o
+cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
 cxgb4-$(CONFIG_CHELSIO_T4_FCOE) +=  cxgb4_fcoe.o
 cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
index bcfa512..4595569 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the Chelsio T4 Ethernet driver for Linux.
  *
- * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -53,6 +53,8 @@
 #include "cxgb4_uld.h"
 
 #define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__)
+extern struct list_head adapter_list;
+extern struct mutex uld_mutex;
 
 enum {
        MAX_NPORTS      = 4,     /* max # of ports */
@@ -338,12 +340,14 @@ struct adapter_params {
        enum chip_type chip;               /* chip code */
        struct arch_specific_params arch;  /* chip specific params */
        unsigned char offload;
+       unsigned char crypto;           /* HW capability for crypto */
 
        unsigned char bypass;
 
        unsigned int ofldq_wr_cred;
        bool ulptx_memwrite_dsgl;          /* use of T5 DSGL allowed */
 
+       unsigned int nsched_cls;          /* number of traffic classes */
        unsigned int max_ordird_qp;       /* Max read depth per RDMA QP */
        unsigned int max_ird_adapter;     /* Max read depth per adapter */
 };
@@ -403,7 +407,6 @@ struct fw_info {
        struct fw_hdr fw_hdr;
 };
 
-
 struct trace_params {
        u32 data[TRACE_LEN / 4];
        u32 mask[TRACE_LEN / 4];
@@ -493,6 +496,7 @@ struct port_info {
 #endif /* CONFIG_CHELSIO_T4_FCOE */
        bool rxtstamp;  /* Enable TS */
        struct hwtstamp_config tstamp_config;
+       struct sched_table *sched_tbl;
 };
 
 struct dentry;
@@ -510,6 +514,10 @@ enum {                                 /* adapter flags */
        FW_OFLD_CONN       = (1 << 9),
 };
 
+enum {
+       ULP_CRYPTO_LOOKASIDE = 1 << 0,
+};
+
 struct rx_sw_desc;
 
 struct sge_fl {                     /* SGE free-buffer queue state */
@@ -680,6 +688,16 @@ struct sge_ctrl_txq {               /* state for an SGE control Tx queue */
        u8 full;                    /* the Tx ring is full */
 } ____cacheline_aligned_in_smp;
 
+struct sge_uld_rxq_info {
+       char name[IFNAMSIZ];    /* name of ULD driver */
+       struct sge_ofld_rxq *uldrxq; /* Rxq's for ULD */
+       u16 *msix_tbl;          /* msix_tbl for uld */
+       u16 *rspq_id;           /* response queue id's of rxq */
+       u16 nrxq;               /* # of ingress uld queues */
+       u16 nciq;               /* # of completion queues */
+       u8 uld;                 /* uld type */
+};
+
 struct sge {
        struct sge_eth_txq ethtxq[MAX_ETH_QSETS];
        struct sge_ofld_txq ofldtxq[MAX_OFLD_QSETS];
@@ -691,6 +709,7 @@ struct sge {
        struct sge_ofld_rxq rdmarxq[MAX_RDMA_QUEUES];
        struct sge_ofld_rxq rdmaciq[MAX_RDMA_CIQS];
        struct sge_rspq fw_evtq ____cacheline_aligned_in_smp;
+       struct sge_uld_rxq_info **uld_rxq_info;
 
        struct sge_rspq intrq ____cacheline_aligned_in_smp;
        spinlock_t intrq_lock;
@@ -702,6 +721,7 @@ struct sge {
        u16 niscsitq;               /* # of available iSCST Rx queues */
        u16 rdmaqs;                 /* # of available RDMA Rx queues */
        u16 rdmaciqs;               /* # of available RDMA concentrator IQs */
+       u16 nqs_per_uld;            /* # of Rx queues per ULD */
        u16 iscsi_rxq[MAX_OFLD_QSETS];
        u16 iscsit_rxq[MAX_ISCSIT_QUEUES];
        u16 rdma_rxq[MAX_RDMA_QUEUES];
@@ -757,6 +777,22 @@ struct hash_mac_addr {
        u8 addr[ETH_ALEN];
 };
 
+struct uld_msix_bmap {
+       unsigned long *msix_bmap;
+       unsigned int mapsize;
+       spinlock_t lock; /* lock for acquiring bitmap */
+};
+
+struct uld_msix_info {
+       unsigned short vec;
+       char desc[IFNAMSIZ + 10];
+};
+
+struct vf_info {
+       unsigned char vf_mac_addr[ETH_ALEN];
+       bool pf_set_mac;
+};
+
 struct adapter {
        void __iomem *regs;
        void __iomem *bar2;
@@ -767,6 +803,7 @@ struct adapter {
        unsigned int mbox;
        unsigned int pf;
        unsigned int flags;
+       unsigned int adap_idx;
        enum chip_type chip;
 
        int msg_enable;
@@ -779,6 +816,9 @@ struct adapter {
                unsigned short vec;
                char desc[IFNAMSIZ + 10];
        } msix_info[MAX_INGQ + 1];
+       struct uld_msix_info *msix_info_ulds; /* msix info for uld's */
+       struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */
+       unsigned int msi_idx;
 
        struct doorbell_stats db_stats;
        struct sge sge;
@@ -786,6 +826,9 @@ struct adapter {
        struct net_device *port[MAX_NPORTS];
        u8 chan_map[NCHAN];                   /* channel -> port map */
 
+       struct vf_info *vfinfo;
+       u8 num_vfs;
+
        u32 filter_mode;
        unsigned int l2t_start;
        unsigned int l2t_end;
@@ -793,7 +836,9 @@ struct adapter {
        unsigned int clipt_start;
        unsigned int clipt_end;
        struct clip_tbl *clipt;
+       struct cxgb4_pci_uld_info *uld;
        void *uld_handle[CXGB4_ULD_MAX];
+       unsigned int num_uld;
        struct list_head list_node;
        struct list_head rcu_node;
        struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */
@@ -824,6 +869,55 @@ struct adapter {
        spinlock_t win0_lock ____cacheline_aligned_in_smp;
 };
 
+/* Support for "sched-class" command to allow a TX Scheduling Class to be
+ * programmed with various parameters.
+ */
+struct ch_sched_params {
+       s8   type;                     /* packet or flow */
+       union {
+               struct {
+                       s8   level;    /* scheduler hierarchy level */
+                       s8   mode;     /* per-class or per-flow */
+                       s8   rateunit; /* bit or packet rate */
+                       s8   ratemode; /* %port relative or kbps absolute */
+                       s8   channel;  /* scheduler channel [0..N] */
+                       s8   class;    /* scheduler class [0..N] */
+                       s32  minrate;  /* minimum rate */
+                       s32  maxrate;  /* maximum rate */
+                       s16  weight;   /* percent weight */
+                       s16  pktsize;  /* average packet size */
+               } params;
+       } u;
+};
+
+enum {
+       SCHED_CLASS_TYPE_PACKET = 0,    /* class type */
+};
+
+enum {
+       SCHED_CLASS_LEVEL_CL_RL = 0,    /* class rate limiter */
+};
+
+enum {
+       SCHED_CLASS_MODE_CLASS = 0,     /* per-class scheduling */
+};
+
+enum {
+       SCHED_CLASS_RATEUNIT_BITS = 0,  /* bit rate scheduling */
+};
+
+enum {
+       SCHED_CLASS_RATEMODE_ABS = 1,   /* Kb/s */
+};
+
+/* Support for "sched_queue" command to allow one or more NIC TX Queues
+ * to be bound to a TX Scheduling Class.
+ */
+struct ch_sched_queue {
+       s8   queue;    /* queue index */
+       s8   class;    /* class index */
+};
+
 /* Defined bit width of user definable filter tuples
  */
 #define ETHTYPE_BITWIDTH 16
@@ -952,6 +1046,11 @@ static inline int is_offload(const struct adapter *adap)
        return adap->params.offload;
 }
 
+static inline int is_pci_uld(const struct adapter *adap)
+{
+       return adap->params.crypto;
+}
+
 static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr)
 {
        return readl(adap->regs + reg_addr);
@@ -1185,8 +1284,6 @@ int t4_sge_init(struct adapter *adap);
 void t4_sge_start(struct adapter *adap);
 void t4_sge_stop(struct adapter *adap);
 int cxgb_busy_poll(struct napi_struct *napi);
-int cxgb4_set_rspq_intr_params(struct sge_rspq *q, unsigned int us,
-                              unsigned int cnt);
 void cxgb4_set_ethtool_ops(struct net_device *netdev);
 int cxgb4_write_rss(const struct port_info *pi, const u16 *queues);
 extern int dbfifo_int_thresh;
@@ -1289,6 +1386,18 @@ static inline int hash_mac_addr(const u8 *addr)
        return a & 0x3f;
 }
 
+int cxgb4_set_rspq_intr_params(struct sge_rspq *q, unsigned int us,
+                              unsigned int cnt);
+static inline void init_rspq(struct adapter *adap, struct sge_rspq *q,
+                            unsigned int us, unsigned int cnt,
+                            unsigned int size, unsigned int iqe_size)
+{
+       q->adap = adap;
+       cxgb4_set_rspq_intr_params(q, us, cnt);
+       q->iqe_len = iqe_size;
+       q->size = size;
+}
+
 void t4_write_indirect(struct adapter *adap, unsigned int addr_reg,
                       unsigned int data_reg, const u32 *vals,
                       unsigned int nregs, unsigned int start_idx);
@@ -1514,6 +1623,9 @@ void t4_get_trace_filter(struct adapter *adapter, struct trace_params *tp,
                         int filter_index, int *enabled);
 int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox,
                         u32 addr, u32 val);
+int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
+                   int rateunit, int ratemode, int channel, int class,
+                   int minrate, int maxrate, int weight, int pktsize);
 void t4_sge_decode_idma_state(struct adapter *adapter, int state);
 void t4_free_mem(void *addr);
 void t4_idma_monitor_init(struct adapter *adapter,
@@ -1523,5 +1635,7 @@ void t4_idma_monitor(struct adapter *adapter,
                     int hz, int ticks);
 int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
                      unsigned int naddr, u8 *addr);
-
+void uld_mem_free(struct adapter *adap);
+int uld_mem_alloc(struct adapter *adap);
+void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl);
 #endif /* __CXGB4_H__ */
index 2bb804c..44cc976 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the Chelsio T4 Ethernet driver for Linux.
  *
- * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -76,6 +76,7 @@
 #include "cxgb4_debugfs.h"
 #include "clip_tbl.h"
 #include "l2t.h"
+#include "sched.h"
 
 char cxgb4_driver_name[] = KBUILD_MODNAME;
 
@@ -223,8 +224,8 @@ MODULE_PARM_DESC(select_queue,
 
 static struct dentry *cxgb4_debugfs_root;
 
-static LIST_HEAD(adapter_list);
-static DEFINE_MUTEX(uld_mutex);
+LIST_HEAD(adapter_list);
+DEFINE_MUTEX(uld_mutex);
 /* Adapter list to be accessed from atomic context */
 static LIST_HEAD(adap_rcu_list);
 static DEFINE_SPINLOCK(adap_rcu_lock);
@@ -1066,20 +1067,20 @@ static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q,
  */
 static int setup_sge_queues(struct adapter *adap)
 {
-       int err, msi_idx, i, j;
+       int err, i, j;
        struct sge *s = &adap->sge;
 
        bitmap_zero(s->starving_fl, s->egr_sz);
        bitmap_zero(s->txq_maperr, s->egr_sz);
 
        if (adap->flags & USING_MSIX)
-               msi_idx = 1;         /* vector 0 is for non-queue interrupts */
+               adap->msi_idx = 1;         /* vector 0 is for non-queue interrupts */
        else {
                err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
                                       NULL, NULL, NULL, -1);
                if (err)
                        return err;
-               msi_idx = -((int)s->intrq.abs_id + 1);
+               adap->msi_idx = -((int)s->intrq.abs_id + 1);
        }
 
        /* NOTE: If you add/delete any Ingress/Egress Queue allocations in here,
@@ -1096,7 +1097,7 @@ static int setup_sge_queues(struct adapter *adap)
         *    new/deleted queues.
         */
        err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
-                              msi_idx, NULL, fwevtq_handler, NULL, -1);
+                              adap->msi_idx, NULL, fwevtq_handler, NULL, -1);
        if (err) {
 freeout:       t4_free_sge_resources(adap);
                return err;
@@ -1109,10 +1110,10 @@ freeout:        t4_free_sge_resources(adap);
                struct sge_eth_txq *t = &s->ethtxq[pi->first_qset];
 
                for (j = 0; j < pi->nqsets; j++, q++) {
-                       if (msi_idx > 0)
-                               msi_idx++;
+                       if (adap->msi_idx > 0)
+                               adap->msi_idx++;
                        err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
-                                              msi_idx, &q->fl,
+                                              adap->msi_idx, &q->fl,
                                               t4_ethrx_handler,
                                               NULL,
                                               t4_get_mps_bg_map(adap,
@@ -1141,11 +1142,11 @@ freeout:        t4_free_sge_resources(adap);
        }
 
 #define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids, lro) do { \
-       err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, msi_idx, ids, lro); \
+       err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, adap->msi_idx, ids, lro); \
        if (err) \
                goto freeout; \
-       if (msi_idx > 0) \
-               msi_idx += nq; \
+       if (adap->msi_idx > 0) \
+               adap->msi_idx += nq; \
 } while (0)
 
        ALLOC_OFLD_RXQS(s->iscsirxq, s->iscsiqsets, j, s->iscsi_rxq, false);
@@ -2565,6 +2566,12 @@ static void detach_ulds(struct adapter *adap)
                                             CXGB4_STATE_DETACH);
                        adap->uld_handle[i] = NULL;
                }
+       for (i = 0; i < CXGB4_PCI_ULD_MAX; i++)
+               if (adap->uld && adap->uld[i].handle) {
+                       adap->uld[i].state_change(adap->uld[i].handle,
+                                            CXGB4_STATE_DETACH);
+                       adap->uld[i].handle = NULL;
+               }
        if (netevent_registered && list_empty(&adapter_list)) {
                unregister_netevent_notifier(&cxgb4_netevent_nb);
                netevent_registered = false;
@@ -2584,6 +2591,10 @@ static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state)
        for (i = 0; i < CXGB4_ULD_MAX; i++)
                if (adap->uld_handle[i])
                        ulds[i].state_change(adap->uld_handle[i], new_state);
+       for (i = 0; i < CXGB4_PCI_ULD_MAX; i++)
+               if (adap->uld && adap->uld[i].handle)
+                       adap->uld[i].state_change(adap->uld[i].handle,
+                                                 new_state);
        mutex_unlock(&uld_mutex);
 }
 
@@ -2922,7 +2933,6 @@ EXPORT_SYMBOL(cxgb4_create_server_filter);
 int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid,
                unsigned int queue, bool ipv6)
 {
-       int ret;
        struct filter_entry *f;
        struct adapter *adap;
 
@@ -2936,11 +2946,7 @@ int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid,
        /* Unlock the filter */
        f->locked = 0;
 
-       ret = delete_filter(adap, stid);
-       if (ret)
-               return ret;
-
-       return 0;
+       return delete_filter(adap, stid);
 }
 EXPORT_SYMBOL(cxgb4_remove_server_filter);
 
@@ -3079,10 +3085,53 @@ static int cxgb_change_mtu(struct net_device *dev, int new_mtu)
 }
 
 #ifdef CONFIG_PCI_IOV
+static int dummy_open(struct net_device *dev)
+{
+       /* Turn carrier off since we don't have to transmit anything on this
+        * interface.
+        */
+       netif_carrier_off(dev);
+       return 0;
+}
+
+/* Fill MAC address that will be assigned by the FW */
+static void fill_vf_station_mac_addr(struct adapter *adap)
+{
+       unsigned int i;
+       u8 hw_addr[ETH_ALEN], macaddr[ETH_ALEN];
+       int err;
+       u8 *na;
+       u16 a, b;
+
+       err = t4_get_raw_vpd_params(adap, &adap->params.vpd);
+       if (!err) {
+               na = adap->params.vpd.na;
+               for (i = 0; i < ETH_ALEN; i++)
+                       hw_addr[i] = (hex2val(na[2 * i + 0]) * 16 +
+                                     hex2val(na[2 * i + 1]));
+               a = (hw_addr[0] << 8) | hw_addr[1];
+               b = (hw_addr[1] << 8) | hw_addr[2];
+               a ^= b;
+               a |= 0x0200;    /* locally assigned Ethernet MAC address */
+               a &= ~0x0100;   /* not a multicast Ethernet MAC address */
+               macaddr[0] = a >> 8;
+               macaddr[1] = a & 0xff;
+
+               for (i = 2; i < 5; i++)
+                       macaddr[i] = hw_addr[i + 1];
+
+               for (i = 0; i < adap->num_vfs; i++) {
+                       macaddr[5] = adap->pf * 16 + i;
+                       ether_addr_copy(adap->vfinfo[i].vf_mac_addr, macaddr);
+               }
+       }
+}
+
 static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
 {
        struct port_info *pi = netdev_priv(dev);
        struct adapter *adap = pi->adapter;
+       int ret;
 
        /* verify MAC addr is valid */
        if (!is_valid_ether_addr(mac)) {
@@ -3094,7 +3143,23 @@ static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
 
        dev_info(pi->adapter->pdev_dev,
                 "Setting MAC %pM on VF %d\n", mac, vf);
-       return t4_set_vf_mac_acl(adap, vf + 1, 1, mac);
+       ret = t4_set_vf_mac_acl(adap, vf + 1, 1, mac);
+       if (!ret)
+               ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, mac);
+       return ret;
+}
+
+static int cxgb_get_vf_config(struct net_device *dev,
+                             int vf, struct ifla_vf_info *ivi)
+{
+       struct port_info *pi = netdev_priv(dev);
+       struct adapter *adap = pi->adapter;
+
+       if (vf >= adap->num_vfs)
+               return -EINVAL;
+       ivi->vf = vf;
+       ether_addr_copy(ivi->mac, adap->vfinfo[vf].vf_mac_addr);
+       return 0;
 }
 #endif
 
@@ -3134,6 +3199,87 @@ static void cxgb_netpoll(struct net_device *dev)
 }
 #endif
 
+static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
+{
+       struct port_info *pi = netdev_priv(dev);
+       struct adapter *adap = pi->adapter;
+       struct sched_class *e;
+       struct ch_sched_params p;
+       struct ch_sched_queue qe;
+       u32 req_rate;
+       int err = 0;
+
+       if (!can_sched(dev))
+               return -ENOTSUPP;
+
+       if (index < 0 || index > pi->nqsets - 1)
+               return -EINVAL;
+
+       if (!(adap->flags & FULL_INIT_DONE)) {
+               dev_err(adap->pdev_dev,
+                       "Failed to rate limit on queue %d. Link Down?\n",
+                       index);
+               return -EINVAL;
+       }
+
+       /* Convert from Mbps to Kbps */
+       req_rate = rate << 10;
+
+       /* Max rate is 10 Gbps */
+       if (req_rate >= SCHED_MAX_RATE_KBPS) {
+               dev_err(adap->pdev_dev,
+                       "Invalid rate %u Mbps, Max rate is %u Gbps\n",
+                       rate, SCHED_MAX_RATE_KBPS);
+               return -ERANGE;
+       }
+
+       /* First unbind the queue from any existing class */
+       memset(&qe, 0, sizeof(qe));
+       qe.queue = index;
+       qe.class = SCHED_CLS_NONE;
+
+       err = cxgb4_sched_class_unbind(dev, (void *)(&qe), SCHED_QUEUE);
+       if (err) {
+               dev_err(adap->pdev_dev,
+                       "Unbinding Queue %d on port %d fail. Err: %d\n",
+                       index, pi->port_id, err);
+               return err;
+       }
+
+       /* Queue already unbound */
+       if (!req_rate)
+               return 0;
+
+       /* Fetch any available unused or matching scheduling class */
+       memset(&p, 0, sizeof(p));
+       p.type = SCHED_CLASS_TYPE_PACKET;
+       p.u.params.level    = SCHED_CLASS_LEVEL_CL_RL;
+       p.u.params.mode     = SCHED_CLASS_MODE_CLASS;
+       p.u.params.rateunit = SCHED_CLASS_RATEUNIT_BITS;
+       p.u.params.ratemode = SCHED_CLASS_RATEMODE_ABS;
+       p.u.params.channel  = pi->tx_chan;
+       p.u.params.class    = SCHED_CLS_NONE;
+       p.u.params.minrate  = 0;
+       p.u.params.maxrate  = req_rate;
+       p.u.params.weight   = 0;
+       p.u.params.pktsize  = dev->mtu;
+
+       e = cxgb4_sched_class_alloc(dev, &p);
+       if (!e)
+               return -ENOMEM;
+
+       /* Bind the queue to a scheduling class */
+       memset(&qe, 0, sizeof(qe));
+       qe.queue = index;
+       qe.class = e->idx;
+
+       err = cxgb4_sched_class_bind(dev, (void *)(&qe), SCHED_QUEUE);
+       if (err)
+               dev_err(adap->pdev_dev,
+                       "Queue rate limiting failed. Err: %d\n", err);
+       return err;
+}
+
 static const struct net_device_ops cxgb4_netdev_ops = {
        .ndo_open             = cxgb_open,
        .ndo_stop             = cxgb_close,
@@ -3156,13 +3302,16 @@ static const struct net_device_ops cxgb4_netdev_ops = {
 #ifdef CONFIG_NET_RX_BUSY_POLL
        .ndo_busy_poll        = cxgb_busy_poll,
 #endif
+       .ndo_set_tx_maxrate   = cxgb_set_tx_maxrate,
 };
 
-static const struct net_device_ops cxgb4_mgmt_netdev_ops = {
 #ifdef CONFIG_PCI_IOV
+static const struct net_device_ops cxgb4_mgmt_netdev_ops = {
+       .ndo_open             = dummy_open,
        .ndo_set_vf_mac       = cxgb_set_vf_mac,
-#endif
+       .ndo_get_vf_config    = cxgb_get_vf_config,
 };
+#endif
 
 static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 {
@@ -4019,6 +4168,12 @@ static int adap_init0(struct adapter *adap)
        adap->clipt_start = val[0];
        adap->clipt_end = val[1];
 
+       /* We don't yet have a PARAMs calls to retrieve the number of Traffic
+        * Classes supported by the hardware/firmware so we hard code it here
+        * for now.
+        */
+       adap->params.nsched_cls = is_t4(adap->params.chip) ? 15 : 16;
+
        /* query params related to active filter region */
        params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START);
        params[1] = FW_PARAM_PFVF(ACTIVE_FILTER_END);
@@ -4170,6 +4325,11 @@ static int adap_init0(struct adapter *adap)
                adap->vres.iscsi.start = val[0];
                adap->vres.iscsi.size = val[1] - val[0] + 1;
        }
+       if (caps_cmd.cryptocaps) {
+               /* Should query params here...TODO */
+               adap->params.crypto |= ULP_CRYPTO_LOOKASIDE;
+               adap->num_uld += 1;
+       }
 #undef FW_PARAM_PFVF
 #undef FW_PARAM_DEV
 
@@ -4351,16 +4511,6 @@ static inline bool is_x_10g_port(const struct link_config *lc)
               (lc->supported & FW_PORT_CAP_SPEED_40G) != 0;
 }
 
-static inline void init_rspq(struct adapter *adap, struct sge_rspq *q,
-                            unsigned int us, unsigned int cnt,
-                            unsigned int size, unsigned int iqe_size)
-{
-       q->adap = adap;
-       cxgb4_set_rspq_intr_params(q, us, cnt);
-       q->iqe_len = iqe_size;
-       q->size = size;
-}
-
 /*
  * Perform default configuration of DMA queues depending on the number and type
  * of ports we found and the number of available CPUs.  Most settings can be
@@ -4375,6 +4525,15 @@ static void cfg_queues(struct adapter *adap)
 #endif
        int ciq_size;
 
+       /* Reduce memory usage in kdump environment, disable all offload.
+        */
+       if (is_kdump_kernel()) {
+               adap->params.offload = 0;
+               adap->params.crypto = 0;
+       } else if (adap->num_uld && uld_mem_alloc(adap)) {
+               adap->params.crypto = 0;
+       }
+
        for_each_port(adap, i)
                n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
 #ifdef CONFIG_CHELSIO_T4_DCB
@@ -4405,11 +4564,6 @@ static void cfg_queues(struct adapter *adap)
        if (q10g > netif_get_num_default_rss_queues())
                q10g = netif_get_num_default_rss_queues();
 
-       /* Reduce memory usage in kdump environment, disable all offload.
-        */
-       if (is_kdump_kernel())
-               adap->params.offload = 0;
-
        for_each_port(adap, i) {
                struct port_info *pi = adap2pinfo(adap, i);
 
@@ -4538,23 +4692,58 @@ static void reduce_ethqs(struct adapter *adap, int n)
        }
 }
 
+static int get_msix_info(struct adapter *adap)
+{
+       struct uld_msix_info *msix_info;
+       int max_ingq = (MAX_OFLD_QSETS * adap->num_uld);
+
+       msix_info = kcalloc(max_ingq, sizeof(*msix_info), GFP_KERNEL);
+       if (!msix_info)
+               return -ENOMEM;
+
+       adap->msix_bmap_ulds.msix_bmap = kcalloc(BITS_TO_LONGS(max_ingq),
+                                                sizeof(long), GFP_KERNEL);
+       if (!adap->msix_bmap_ulds.msix_bmap) {
+               kfree(msix_info);
+               return -ENOMEM;
+       }
+       spin_lock_init(&adap->msix_bmap_ulds.lock);
+       adap->msix_info_ulds = msix_info;
+       return 0;
+}
+
+static void free_msix_info(struct adapter *adap)
+{
+       if (!adap->num_uld)
+               return;
+
+       kfree(adap->msix_info_ulds);
+       kfree(adap->msix_bmap_ulds.msix_bmap);
+}
+
 /* 2 MSI-X vectors needed for the FW queue and non-data interrupts */
 #define EXTRA_VECS 2
 
 static int enable_msix(struct adapter *adap)
 {
-       int ofld_need = 0;
-       int i, want, need, allocated;
+       int ofld_need = 0, uld_need = 0;
+       int i, j, want, need, allocated;
        struct sge *s = &adap->sge;
        unsigned int nchan = adap->params.nports;
        struct msix_entry *entries;
+       int max_ingq = MAX_INGQ;
 
-       entries = kmalloc(sizeof(*entries) * (MAX_INGQ + 1),
+       max_ingq += (MAX_OFLD_QSETS * adap->num_uld);
+       entries = kmalloc(sizeof(*entries) * (max_ingq + 1),
                          GFP_KERNEL);
        if (!entries)
                return -ENOMEM;
 
-       for (i = 0; i < MAX_INGQ + 1; ++i)
+       /* map for msix */
+       if (is_pci_uld(adap) && get_msix_info(adap))
+               adap->params.crypto = 0;
+
+       for (i = 0; i < max_ingq + 1; ++i)
                entries[i].entry = i;
 
        want = s->max_ethqsets + EXTRA_VECS;
@@ -4567,13 +4756,17 @@ static int enable_msix(struct adapter *adap)
                else
                        ofld_need = 4 * nchan;
        }
+       if (is_pci_uld(adap)) {
+               want += netif_get_num_default_rss_queues() * nchan;
+               uld_need = nchan;
+       }
 #ifdef CONFIG_CHELSIO_T4_DCB
        /* For Data Center Bridging we need 8 Ethernet TX Priority Queues for
         * each port.
         */
-       need = 8 * adap->params.nports + EXTRA_VECS + ofld_need;
+       need = 8 * adap->params.nports + EXTRA_VECS + ofld_need + uld_need;
 #else
-       need = adap->params.nports + EXTRA_VECS + ofld_need;
+       need = adap->params.nports + EXTRA_VECS + ofld_need + uld_need;
 #endif
        allocated = pci_enable_msix_range(adap->pdev, entries, need, want);
        if (allocated < 0) {
@@ -4587,12 +4780,20 @@ static int enable_msix(struct adapter *adap)
         * Every group gets its minimum requirement and NIC gets top
         * priority for leftovers.
         */
-       i = allocated - EXTRA_VECS - ofld_need;
+       i = allocated - EXTRA_VECS - ofld_need - uld_need;
        if (i < s->max_ethqsets) {
                s->max_ethqsets = i;
                if (i < s->ethqsets)
                        reduce_ethqs(adap, i);
        }
+       if (is_pci_uld(adap)) {
+               if (allocated < want)
+                       s->nqs_per_uld = nchan;
+               else
+                       s->nqs_per_uld = netif_get_num_default_rss_queues() *
+                                       nchan;
+       }
+
        if (is_offload(adap)) {
                if (allocated < want) {
                        s->rdmaqs = nchan;
@@ -4604,16 +4805,24 @@ static int enable_msix(struct adapter *adap)
 
                /* leftovers go to OFLD */
                i = allocated - EXTRA_VECS - s->max_ethqsets -
-                   s->rdmaqs - s->rdmaciqs - s->niscsitq;
+                       s->rdmaqs - s->rdmaciqs - s->niscsitq;
+               if (is_pci_uld(adap))
+                       i -= s->nqs_per_uld * adap->num_uld;
                s->iscsiqsets = (i / nchan) * nchan;  /* round down */
 
        }
-       for (i = 0; i < allocated; ++i)
+
+       for (i = 0; i < (allocated - (s->nqs_per_uld * adap->num_uld)); ++i)
                adap->msix_info[i].vec = entries[i].vector;
+       if (is_pci_uld(adap)) {
+               for (j = 0 ; i < allocated; ++i, j++)
+                       adap->msix_info_ulds[j].vec = entries[i].vector;
+               adap->msix_bmap_ulds.mapsize = j;
+       }
        dev_info(adap->pdev_dev, "%d MSI-X vectors allocated, "
-                "nic %d iscsi %d rdma cpl %d rdma ciq %d\n",
+                "nic %d iscsi %d rdma cpl %d rdma ciq %d uld %d\n",
                 allocated, s->max_ethqsets, s->iscsiqsets, s->rdmaqs,
-                s->rdmaciqs);
+                s->rdmaciqs, s->nqs_per_uld);
 
        kfree(entries);
        return 0;
@@ -4823,6 +5032,7 @@ static void free_some_resources(struct adapter *adapter)
        unsigned int i;
 
        t4_free_mem(adapter->l2t);
+       t4_cleanup_sched(adapter);
        t4_free_mem(adapter->tids.tid_tab);
        kfree(adapter->sge.egr_map);
        kfree(adapter->sge.ingr_map);
@@ -4874,6 +5084,51 @@ static int get_chip_type(struct pci_dev *pdev, u32 pl_rev)
 }
 
 #ifdef CONFIG_PCI_IOV
+static void dummy_setup(struct net_device *dev)
+{
+       dev->type = ARPHRD_NONE;
+       dev->mtu = 0;
+       dev->hard_header_len = 0;
+       dev->addr_len = 0;
+       dev->tx_queue_len = 0;
+       dev->flags |= IFF_NOARP;
+       dev->priv_flags |= IFF_NO_QUEUE;
+
+       /* Initialize the device structure. */
+       dev->netdev_ops = &cxgb4_mgmt_netdev_ops;
+       dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
+       dev->destructor = free_netdev;
+}
+
+static int config_mgmt_dev(struct pci_dev *pdev)
+{
+       struct adapter *adap = pci_get_drvdata(pdev);
+       struct net_device *netdev;
+       struct port_info *pi;
+       char name[IFNAMSIZ];
+       int err;
+
+       snprintf(name, IFNAMSIZ, "mgmtpf%d%d", adap->adap_idx, adap->pf);
+       netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, dummy_setup);
+       if (!netdev)
+               return -ENOMEM;
+
+       pi = netdev_priv(netdev);
+       pi->adapter = adap;
+       SET_NETDEV_DEV(netdev, &pdev->dev);
+
+       adap->port[0] = netdev;
+
+       err = register_netdev(adap->port[0]);
+       if (err) {
+               pr_info("Unable to register VF mgmt netdev %s\n", name);
+               free_netdev(adap->port[0]);
+               adap->port[0] = NULL;
+               return err;
+       }
+       return 0;
+}
+
 static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
 {
        struct adapter *adap = pci_get_drvdata(pdev);
@@ -4908,8 +5163,14 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
         */
        if (!num_vfs) {
                pci_disable_sriov(pdev);
-               if (adap->port[0]->reg_state == NETREG_REGISTERED)
+               if (adap->port[0]) {
                        unregister_netdev(adap->port[0]);
+                       adap->port[0] = NULL;
+               }
+               /* free VF resources */
+               kfree(adap->vfinfo);
+               adap->vfinfo = NULL;
+               adap->num_vfs = 0;
                return num_vfs;
        }
 
@@ -4918,12 +5179,16 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
                if (err)
                        return err;
 
-               if (adap->port[0]->reg_state == NETREG_UNINITIALIZED) {
-                       err = register_netdev(adap->port[0]);
-                       if (err < 0)
-                               pr_info("Unable to register VF mgmt netdev\n");
-               }
+               adap->num_vfs = num_vfs;
+               err = config_mgmt_dev(pdev);
+               if (err)
+                       return err;
        }
+
+       adap->vfinfo = kcalloc(adap->num_vfs,
+                              sizeof(struct vf_info), GFP_KERNEL);
+       if (adap->vfinfo)
+               fill_vf_station_mac_addr(adap);
        return num_vfs;
 }
 #endif
@@ -4935,9 +5200,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        bool highdma = false;
        struct adapter *adapter = NULL;
        struct net_device *netdev;
-#ifdef CONFIG_PCI_IOV
-       char name[IFNAMSIZ];
-#endif
        void __iomem *regs;
        u32 whoami, pl_rev;
        enum chip_type chip;
@@ -5190,6 +5452,16 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                }
        }
 #endif
+
+       for_each_port(adapter, i) {
+               pi = adap2pinfo(adapter, i);
+               pi->sched_tbl = t4_init_sched(adapter->params.nsched_cls);
+               if (!pi->sched_tbl)
+                       dev_warn(&pdev->dev,
+                                "could not activate scheduling on port %d\n",
+                                i);
+       }
+
        if (is_offload(adapter) && tid_init(&adapter->tids) < 0) {
                dev_warn(&pdev->dev, "could not allocate TID table, "
                         "continuing\n");
@@ -5215,8 +5487,11 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        /* See what interrupts we'll be using */
        if (msi > 1 && enable_msix(adapter) == 0)
                adapter->flags |= USING_MSIX;
-       else if (msi > 0 && pci_enable_msi(pdev) == 0)
+       else if (msi > 0 && pci_enable_msi(pdev) == 0) {
                adapter->flags |= USING_MSI;
+               if (msi > 1)
+                       free_msix_info(adapter);
+       }
 
        /* check for PCI Express bandwidth capabiltites */
        cxgb4_check_pcie_caps(adapter);
@@ -5285,40 +5560,24 @@ sriov:
                goto free_pci_region;
        }
 
-       snprintf(name, IFNAMSIZ, "mgmtpf%d%d", adap_idx, func);
-       netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, ether_setup);
-       if (!netdev) {
-               err = -ENOMEM;
-               goto free_adapter;
-       }
-
        adapter->pdev = pdev;
        adapter->pdev_dev = &pdev->dev;
        adapter->name = pci_name(pdev);
        adapter->mbox = func;
        adapter->pf = func;
        adapter->regs = regs;
+       adapter->adap_idx = adap_idx;
        adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
                                    (sizeof(struct mbox_cmd) *
                                     T4_OS_LOG_MBOX_CMDS),
                                    GFP_KERNEL);
        if (!adapter->mbox_log) {
                err = -ENOMEM;
-               goto free_netdevice;
+               goto free_adapter;
        }
-       pi = netdev_priv(netdev);
-       pi->adapter = adapter;
-       SET_NETDEV_DEV(netdev, &pdev->dev);
        pci_set_drvdata(pdev, adapter);
-
-       adapter->port[0] = netdev;
-       netdev->netdev_ops = &cxgb4_mgmt_netdev_ops;
-       netdev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
-
        return 0;
 
- free_netdevice:
-       free_netdev(adapter->port[0]);
  free_adapter:
        kfree(adapter);
  free_pci_region:
@@ -5332,6 +5591,10 @@ sriov:
 
  out_free_dev:
        free_some_resources(adapter);
+       if (adapter->flags & USING_MSIX)
+               free_msix_info(adapter);
+       if (adapter->num_uld)
+               uld_mem_free(adapter);
  out_unmap_bar:
        if (!is_t4(adapter->params.chip))
                iounmap(adapter->bar2);
@@ -5393,6 +5656,10 @@ static void remove_one(struct pci_dev *pdev)
                if (adapter->flags & FULL_INIT_DONE)
                        cxgb_down(adapter);
 
+               if (adapter->flags & USING_MSIX)
+                       free_msix_info(adapter);
+               if (adapter->num_uld)
+                       uld_mem_free(adapter);
                free_some_resources(adapter);
 #if IS_ENABLED(CONFIG_IPV6)
                t4_cleanup_clip_tbl(adapter);
@@ -5412,10 +5679,10 @@ static void remove_one(struct pci_dev *pdev)
        }
 #ifdef CONFIG_PCI_IOV
        else {
-               if (adapter->port[0]->reg_state == NETREG_REGISTERED)
+               if (adapter->port[0])
                        unregister_netdev(adapter->port[0]);
-               free_netdev(adapter->port[0]);
                iounmap(adapter->regs);
+               kfree(adapter->vfinfo);
                kfree(adapter);
                pci_disable_sriov(pdev);
                pci_release_regions(pdev);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
new file mode 100644 (file)
index 0000000..5d402ba
--- /dev/null
@@ -0,0 +1,554 @@
+/*
+ * cxgb4_uld.c:Chelsio Upper Layer Driver Interface for T4/T5/T6 SGE management
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *  Written by: Atul Gupta (atul.gupta@chelsio.com)
+ *  Written by: Hariprasad Shenai (hariprasad@chelsio.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/pci.h>
+
+#include "cxgb4.h"
+#include "cxgb4_uld.h"
+#include "t4_regs.h"
+#include "t4fw_api.h"
+#include "t4_msg.h"
+
+#define for_each_uldrxq(m, i) for (i = 0; i < ((m)->nrxq + (m)->nciq); i++)
+
+static int get_msix_idx_from_bmap(struct adapter *adap)
+{
+       struct uld_msix_bmap *bmap = &adap->msix_bmap_ulds;
+       unsigned long flags;
+       unsigned int msix_idx;
+
+       spin_lock_irqsave(&bmap->lock, flags);
+       msix_idx = find_first_zero_bit(bmap->msix_bmap, bmap->mapsize);
+       if (msix_idx < bmap->mapsize) {
+               __set_bit(msix_idx, bmap->msix_bmap);
+       } else {
+               spin_unlock_irqrestore(&bmap->lock, flags);
+               return -ENOSPC;
+       }
+
+       spin_unlock_irqrestore(&bmap->lock, flags);
+       return msix_idx;
+}
+
+static void free_msix_idx_in_bmap(struct adapter *adap, unsigned int msix_idx)
+{
+       struct uld_msix_bmap *bmap = &adap->msix_bmap_ulds;
+       unsigned long flags;
+
+       spin_lock_irqsave(&bmap->lock, flags);
+        __clear_bit(msix_idx, bmap->msix_bmap);
+       spin_unlock_irqrestore(&bmap->lock, flags);
+}
+
+static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
+                        const struct pkt_gl *gl)
+{
+       struct adapter *adap = q->adap;
+       struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq);
+       int ret;
+
+       /* FW can send CPLs encapsulated in a CPL_FW4_MSG */
+       if (((const struct rss_header *)rsp)->opcode == CPL_FW4_MSG &&
+           ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL)
+               rsp += 2;
+
+       if (q->flush_handler)
+               ret = adap->uld[q->uld].lro_rx_handler(adap->uld[q->uld].handle,
+                               rsp, gl, &q->lro_mgr,
+                               &q->napi);
+       else
+               ret = adap->uld[q->uld].rx_handler(adap->uld[q->uld].handle,
+                               rsp, gl);
+
+       if (ret) {
+               rxq->stats.nomem++;
+               return -1;
+       }
+
+       if (!gl)
+               rxq->stats.imm++;
+       else if (gl == CXGB4_MSG_AN)
+               rxq->stats.an++;
+       else
+               rxq->stats.pkts++;
+       return 0;
+}
+
+static int alloc_uld_rxqs(struct adapter *adap,
+                         struct sge_uld_rxq_info *rxq_info,
+                         unsigned int nq, unsigned int offset, bool lro)
+{
+       struct sge *s = &adap->sge;
+       struct sge_ofld_rxq *q = rxq_info->uldrxq + offset;
+       unsigned short *ids = rxq_info->rspq_id + offset;
+       unsigned int per_chan = nq / adap->params.nports;
+       unsigned int msi_idx, bmap_idx;
+       int i, err;
+
+       if (adap->flags & USING_MSIX)
+               msi_idx = 1;
+       else
+               msi_idx = -((int)s->intrq.abs_id + 1);
+
+       for (i = 0; i < nq; i++, q++) {
+               if (msi_idx >= 0) {
+                       bmap_idx = get_msix_idx_from_bmap(adap);
+                       adap->msi_idx++;
+               }
+               err = t4_sge_alloc_rxq(adap, &q->rspq, false,
+                                      adap->port[i / per_chan],
+                                      adap->msi_idx,
+                                      q->fl.size ? &q->fl : NULL,
+                                      uldrx_handler,
+                                      NULL,
+                                      0);
+               if (err)
+                       goto freeout;
+               if (msi_idx >= 0)
+                       rxq_info->msix_tbl[i + offset] = bmap_idx;
+               memset(&q->stats, 0, sizeof(q->stats));
+               if (ids)
+                       ids[i] = q->rspq.abs_id;
+       }
+       return 0;
+freeout:
+       q = rxq_info->uldrxq + offset;
+       for ( ; i; i--, q++) {
+               if (q->rspq.desc)
+                       free_rspq_fl(adap, &q->rspq,
+                                    q->fl.size ? &q->fl : NULL);
+               adap->msi_idx--;
+       }
+
+       /* We need to free rxq also in case of ciq allocation failure */
+       if (offset) {
+               q = rxq_info->uldrxq + offset;
+               for ( ; i; i--, q++) {
+                       if (q->rspq.desc)
+                               free_rspq_fl(adap, &q->rspq,
+                                            q->fl.size ? &q->fl : NULL);
+                       adap->msi_idx--;
+               }
+       }
+       return err;
+}
+
+int setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro)
+{
+       struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+
+       if (adap->flags & USING_MSIX) {
+               rxq_info->msix_tbl = kzalloc(rxq_info->nrxq + rxq_info->nciq,
+                                            GFP_KERNEL);
+               if (!rxq_info->msix_tbl)
+                       return -ENOMEM;
+       }
+
+       return !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) &&
+                !alloc_uld_rxqs(adap, rxq_info, rxq_info->nciq,
+                                rxq_info->nrxq, lro));
+}
+
+static void t4_free_uld_rxqs(struct adapter *adap, int n,
+                            struct sge_ofld_rxq *q)
+{
+       for ( ; n; n--, q++) {
+               if (q->rspq.desc)
+                       free_rspq_fl(adap, &q->rspq,
+                                    q->fl.size ? &q->fl : NULL);
+               adap->msi_idx--;
+       }
+}
+
+void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type)
+{
+       struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+
+       if (rxq_info->nciq)
+               t4_free_uld_rxqs(adap, rxq_info->nciq,
+                                rxq_info->uldrxq + rxq_info->nrxq);
+       t4_free_uld_rxqs(adap, rxq_info->nrxq, rxq_info->uldrxq);
+       if (adap->flags & USING_MSIX)
+               kfree(rxq_info->msix_tbl);
+}
+
+int cfg_queues_uld(struct adapter *adap, unsigned int uld_type,
+                  const struct cxgb4_pci_uld_info *uld_info)
+{
+       struct sge *s = &adap->sge;
+       struct sge_uld_rxq_info *rxq_info;
+       int i, nrxq;
+
+       rxq_info = kzalloc(sizeof(*rxq_info), GFP_KERNEL);
+       if (!rxq_info)
+               return -ENOMEM;
+
+       if (uld_info->nrxq > s->nqs_per_uld)
+               rxq_info->nrxq = s->nqs_per_uld;
+       else
+               rxq_info->nrxq = uld_info->nrxq;
+       if (!uld_info->nciq)
+               rxq_info->nciq = 0;
+       else if (uld_info->nciq && uld_info->nciq > s->nqs_per_uld)
+               rxq_info->nciq = s->nqs_per_uld;
+       else
+               rxq_info->nciq = uld_info->nciq;
+
+       nrxq = rxq_info->nrxq + rxq_info->nciq; /* total rxq's */
+       rxq_info->uldrxq = kcalloc(nrxq, sizeof(struct sge_ofld_rxq),
+                                  GFP_KERNEL);
+       if (!rxq_info->uldrxq) {
+               kfree(rxq_info);
+               return -ENOMEM;
+       }
+
+       rxq_info->rspq_id = kcalloc(nrxq, sizeof(unsigned short), GFP_KERNEL);
+       if (!rxq_info->uldrxq) {
+               kfree(rxq_info->uldrxq);
+               kfree(rxq_info);
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < rxq_info->nrxq; i++) {
+               struct sge_ofld_rxq *r = &rxq_info->uldrxq[i];
+
+               init_rspq(adap, &r->rspq, 5, 1, uld_info->rxq_size, 64);
+               r->rspq.uld = uld_type;
+               r->fl.size = 72;
+       }
+
+       for (i = rxq_info->nrxq; i < nrxq; i++) {
+               struct sge_ofld_rxq *r = &rxq_info->uldrxq[i];
+
+               init_rspq(adap, &r->rspq, 5, 1, uld_info->ciq_size, 64);
+               r->rspq.uld = uld_type;
+               r->fl.size = 72;
+       }
+
+       memcpy(rxq_info->name, uld_info->name, IFNAMSIZ);
+       adap->sge.uld_rxq_info[uld_type] = rxq_info;
+
+       return 0;
+}
+
+void free_queues_uld(struct adapter *adap, unsigned int uld_type)
+{
+       struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+
+       kfree(rxq_info->rspq_id);
+       kfree(rxq_info->uldrxq);
+       kfree(rxq_info);
+}
+
+int request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
+{
+       struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+       int idx, bmap_idx, err = 0;
+
+       for_each_uldrxq(rxq_info, idx) {
+               bmap_idx = rxq_info->msix_tbl[idx];
+               err = request_irq(adap->msix_info_ulds[bmap_idx].vec,
+                                 t4_sge_intr_msix, 0,
+                                 adap->msix_info_ulds[bmap_idx].desc,
+                                 &rxq_info->uldrxq[idx].rspq);
+               if (err)
+                       goto unwind;
+       }
+       return 0;
+unwind:
+       while (--idx >= 0) {
+               bmap_idx = rxq_info->msix_tbl[idx];
+               free_msix_idx_in_bmap(adap, bmap_idx);
+               free_irq(adap->msix_info_ulds[bmap_idx].vec,
+                        &rxq_info->uldrxq[idx].rspq);
+       }
+       return err;
+}
+
+void free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
+{
+       struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+       int idx;
+
+       for_each_uldrxq(rxq_info, idx) {
+               unsigned int bmap_idx = rxq_info->msix_tbl[idx];
+
+               free_msix_idx_in_bmap(adap, bmap_idx);
+               free_irq(adap->msix_info_ulds[bmap_idx].vec,
+                        &rxq_info->uldrxq[idx].rspq);
+       }
+}
+
+void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type)
+{
+       struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+       int n = sizeof(adap->msix_info_ulds[0].desc);
+       int idx;
+
+       for_each_uldrxq(rxq_info, idx) {
+               unsigned int bmap_idx = rxq_info->msix_tbl[idx];
+
+               snprintf(adap->msix_info_ulds[bmap_idx].desc, n, "%s-%s%d",
+                        adap->port[0]->name, rxq_info->name, idx);
+       }
+}
+
+static void enable_rx(struct adapter *adap, struct sge_rspq *q)
+{
+       if (!q)
+               return;
+
+       if (q->handler) {
+               cxgb_busy_poll_init_lock(q);
+               napi_enable(&q->napi);
+       }
+       /* 0-increment GTS to start the timer and enable interrupts */
+       t4_write_reg(adap, MYPF_REG(SGE_PF_GTS_A),
+                    SEINTARM_V(q->intr_params) |
+                    INGRESSQID_V(q->cntxt_id));
+}
+
+static void quiesce_rx(struct adapter *adap, struct sge_rspq *q)
+{
+       if (q && q->handler) {
+               napi_disable(&q->napi);
+               local_bh_disable();
+               while (!cxgb_poll_lock_napi(q))
+                       mdelay(1);
+               local_bh_enable();
+       }
+}
+
+void enable_rx_uld(struct adapter *adap, unsigned int uld_type)
+{
+       struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+       int idx;
+
+       for_each_uldrxq(rxq_info, idx)
+               enable_rx(adap, &rxq_info->uldrxq[idx].rspq);
+}
+
+void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type)
+{
+       struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+       int idx;
+
+       for_each_uldrxq(rxq_info, idx)
+               quiesce_rx(adap, &rxq_info->uldrxq[idx].rspq);
+}
+
+static void uld_queue_init(struct adapter *adap, unsigned int uld_type,
+                          struct cxgb4_lld_info *lli)
+{
+       struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+
+       lli->rxq_ids = rxq_info->rspq_id;
+       lli->nrxq = rxq_info->nrxq;
+       lli->ciq_ids = rxq_info->rspq_id + rxq_info->nrxq;
+       lli->nciq = rxq_info->nciq;
+}
+
+int uld_mem_alloc(struct adapter *adap)
+{
+       struct sge *s = &adap->sge;
+
+       adap->uld = kcalloc(adap->num_uld, sizeof(*adap->uld), GFP_KERNEL);
+       if (!adap->uld)
+               return -ENOMEM;
+
+       s->uld_rxq_info = kzalloc(adap->num_uld *
+                                 sizeof(struct sge_uld_rxq_info *),
+                                 GFP_KERNEL);
+       if (!s->uld_rxq_info)
+               goto err_uld;
+
+       return 0;
+err_uld:
+       kfree(adap->uld);
+       return -ENOMEM;
+}
+
+void uld_mem_free(struct adapter *adap)
+{
+       struct sge *s = &adap->sge;
+
+       kfree(s->uld_rxq_info);
+       kfree(adap->uld);
+}
+
+static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
+{
+       int i;
+
+       lld->pdev = adap->pdev;
+       lld->pf = adap->pf;
+       lld->l2t = adap->l2t;
+       lld->tids = &adap->tids;
+       lld->ports = adap->port;
+       lld->vr = &adap->vres;
+       lld->mtus = adap->params.mtus;
+       lld->ntxq = adap->sge.iscsiqsets;
+       lld->nchan = adap->params.nports;
+       lld->nports = adap->params.nports;
+       lld->wr_cred = adap->params.ofldq_wr_cred;
+       lld->adapter_type = adap->params.chip;
+       lld->cclk_ps = 1000000000 / adap->params.vpd.cclk;
+       lld->udb_density = 1 << adap->params.sge.eq_qpp;
+       lld->ucq_density = 1 << adap->params.sge.iq_qpp;
+       lld->filt_mode = adap->params.tp.vlan_pri_map;
+       /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */
+       for (i = 0; i < NCHAN; i++)
+               lld->tx_modq[i] = i;
+       lld->gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS_A);
+       lld->db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL_A);
+       lld->fw_vers = adap->params.fw_vers;
+       lld->dbfifo_int_thresh = dbfifo_int_thresh;
+       lld->sge_ingpadboundary = adap->sge.fl_align;
+       lld->sge_egrstatuspagesize = adap->sge.stat_len;
+       lld->sge_pktshift = adap->sge.pktshift;
+       lld->enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN;
+       lld->max_ordird_qp = adap->params.max_ordird_qp;
+       lld->max_ird_adapter = adap->params.max_ird_adapter;
+       lld->ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
+       lld->nodeid = dev_to_node(adap->pdev_dev);
+}
+
+static void uld_attach(struct adapter *adap, unsigned int uld)
+{
+       void *handle;
+       struct cxgb4_lld_info lli;
+
+       uld_init(adap, &lli);
+       uld_queue_init(adap, uld, &lli);
+
+       handle = adap->uld[uld].add(&lli);
+       if (IS_ERR(handle)) {
+               dev_warn(adap->pdev_dev,
+                        "could not attach to the %s driver, error %ld\n",
+                        adap->uld[uld].name, PTR_ERR(handle));
+               return;
+       }
+
+       adap->uld[uld].handle = handle;
+
+       if (adap->flags & FULL_INIT_DONE)
+               adap->uld[uld].state_change(handle, CXGB4_STATE_UP);
+}
+
+int cxgb4_register_pci_uld(enum cxgb4_pci_uld type,
+                          struct cxgb4_pci_uld_info *p)
+{
+       int ret = 0;
+       struct adapter *adap;
+
+       if (type >= CXGB4_PCI_ULD_MAX)
+               return -EINVAL;
+
+       mutex_lock(&uld_mutex);
+       list_for_each_entry(adap, &adapter_list, list_node) {
+               if (!is_pci_uld(adap))
+                       continue;
+               ret = cfg_queues_uld(adap, type, p);
+               if (ret)
+                       goto out;
+               ret = setup_sge_queues_uld(adap, type, p->lro);
+               if (ret)
+                       goto free_queues;
+               if (adap->flags & USING_MSIX) {
+                       name_msix_vecs_uld(adap, type);
+                       ret = request_msix_queue_irqs_uld(adap, type);
+                       if (ret)
+                               goto free_rxq;
+               }
+               if (adap->flags & FULL_INIT_DONE)
+                       enable_rx_uld(adap, type);
+               if (adap->uld[type].add) {
+                       ret = -EBUSY;
+                       goto free_irq;
+               }
+               adap->uld[type] = *p;
+               uld_attach(adap, type);
+       }
+       mutex_unlock(&uld_mutex);
+       return 0;
+
+free_irq:
+       if (adap->flags & USING_MSIX)
+               free_msix_queue_irqs_uld(adap, type);
+free_rxq:
+       free_sge_queues_uld(adap, type);
+free_queues:
+       free_queues_uld(adap, type);
+out:
+       mutex_unlock(&uld_mutex);
+       return ret;
+}
+EXPORT_SYMBOL(cxgb4_register_pci_uld);
+
+int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type)
+{
+       struct adapter *adap;
+
+       if (type >= CXGB4_PCI_ULD_MAX)
+               return -EINVAL;
+
+       mutex_lock(&uld_mutex);
+       list_for_each_entry(adap, &adapter_list, list_node) {
+               if (!is_pci_uld(adap))
+                       continue;
+               adap->uld[type].handle = NULL;
+               adap->uld[type].add = NULL;
+               if (adap->flags & FULL_INIT_DONE)
+                       quiesce_rx_uld(adap, type);
+               if (adap->flags & USING_MSIX)
+                       free_msix_queue_irqs_uld(adap, type);
+               free_sge_queues_uld(adap, type);
+               free_queues_uld(adap, type);
+       }
+       mutex_unlock(&uld_mutex);
+
+       return 0;
+}
+EXPORT_SYMBOL(cxgb4_unregister_pci_uld);
index f3c58aa..ab40372 100644 (file)
@@ -32,8 +32,8 @@
  * SOFTWARE.
  */
 
-#ifndef __CXGB4_OFLD_H
-#define __CXGB4_OFLD_H
+#ifndef __CXGB4_ULD_H
+#define __CXGB4_ULD_H
 
 #include <linux/cache.h>
 #include <linux/spinlock.h>
@@ -296,8 +296,36 @@ struct cxgb4_uld_info {
        void (*lro_flush)(struct t4_lro_mgr *);
 };
 
+enum cxgb4_pci_uld {
+       CXGB4_PCI_ULD1,
+       CXGB4_PCI_ULD_MAX
+};
+
+struct cxgb4_pci_uld_info {
+       const char *name;
+       bool lro;
+       void *handle;
+       unsigned int nrxq;
+       unsigned int nciq;
+       unsigned int rxq_size;
+       unsigned int ciq_size;
+       void *(*add)(const struct cxgb4_lld_info *p);
+       int (*rx_handler)(void *handle, const __be64 *rsp,
+                         const struct pkt_gl *gl);
+       int (*state_change)(void *handle, enum cxgb4_state new_state);
+       int (*control)(void *handle, enum cxgb4_control control, ...);
+       int (*lro_rx_handler)(void *handle, const __be64 *rsp,
+                             const struct pkt_gl *gl,
+                             struct t4_lro_mgr *lro_mgr,
+                             struct napi_struct *napi);
+       void (*lro_flush)(struct t4_lro_mgr *);
+};
+
 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
 int cxgb4_unregister_uld(enum cxgb4_uld type);
+int cxgb4_register_pci_uld(enum cxgb4_pci_uld type,
+                          struct cxgb4_pci_uld_info *p);
+int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type);
 int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb);
 unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo);
 unsigned int cxgb4_port_chan(const struct net_device *dev);
@@ -330,4 +358,4 @@ int cxgb4_bar2_sge_qregs(struct net_device *dev,
                         u64 *pbar2_qoffset,
                         unsigned int *pbar2_qid);
 
-#endif  /* !__CXGB4_OFLD_H */
+#endif  /* !__CXGB4_ULD_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c
new file mode 100644 (file)
index 0000000..539de76
--- /dev/null
@@ -0,0 +1,556 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+
+#include "cxgb4.h"
+#include "sched.h"
+
+/* Spinlock must be held by caller */
+static int t4_sched_class_fw_cmd(struct port_info *pi,
+                                struct ch_sched_params *p,
+                                enum sched_fw_ops op)
+{
+       struct adapter *adap = pi->adapter;
+       struct sched_table *s = pi->sched_tbl;
+       struct sched_class *e;
+       int err = 0;
+
+       e = &s->tab[p->u.params.class];
+       switch (op) {
+       case SCHED_FW_OP_ADD:
+               err = t4_sched_params(adap, p->type,
+                                     p->u.params.level, p->u.params.mode,
+                                     p->u.params.rateunit,
+                                     p->u.params.ratemode,
+                                     p->u.params.channel, e->idx,
+                                     p->u.params.minrate, p->u.params.maxrate,
+                                     p->u.params.weight, p->u.params.pktsize);
+               break;
+       default:
+               err = -ENOTSUPP;
+               break;
+       }
+
+       return err;
+}
+
+/* Spinlock must be held by caller */
+static int t4_sched_bind_unbind_op(struct port_info *pi, void *arg,
+                                  enum sched_bind_type type, bool bind)
+{
+       struct adapter *adap = pi->adapter;
+       u32 fw_mnem, fw_class, fw_param;
+       unsigned int pf = adap->pf;
+       unsigned int vf = 0;
+       int err = 0;
+
+       switch (type) {
+       case SCHED_QUEUE: {
+               struct sched_queue_entry *qe;
+
+               qe = (struct sched_queue_entry *)arg;
+
+               /* Create a template for the FW_PARAMS_CMD mnemonic and
+                * value (TX Scheduling Class in this case).
+                */
+               fw_mnem = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
+                          FW_PARAMS_PARAM_X_V(
+                                  FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH));
+               fw_class = bind ? qe->param.class : FW_SCHED_CLS_NONE;
+               fw_param = (fw_mnem | FW_PARAMS_PARAM_YZ_V(qe->cntxt_id));
+
+               pf = adap->pf;
+               vf = 0;
+               break;
+       }
+       default:
+               err = -ENOTSUPP;
+               goto out;
+       }
+
+       err = t4_set_params(adap, adap->mbox, pf, vf, 1, &fw_param, &fw_class);
+
+out:
+       return err;
+}
+
+static struct sched_class *t4_sched_queue_lookup(struct port_info *pi,
+                                                const unsigned int qid,
+                                                int *index)
+{
+       struct sched_table *s = pi->sched_tbl;
+       struct sched_class *e, *end;
+       struct sched_class *found = NULL;
+       int i;
+
+       /* Look for a class with matching bound queue parameters */
+       end = &s->tab[s->sched_size];
+       for (e = &s->tab[0]; e != end; ++e) {
+               struct sched_queue_entry *qe;
+
+               i = 0;
+               if (e->state == SCHED_STATE_UNUSED)
+                       continue;
+
+               list_for_each_entry(qe, &e->queue_list, list) {
+                       if (qe->cntxt_id == qid) {
+                               found = e;
+                               if (index)
+                                       *index = i;
+                               break;
+                       }
+                       i++;
+               }
+
+               if (found)
+                       break;
+       }
+
+       return found;
+}
+
+static int t4_sched_queue_unbind(struct port_info *pi, struct ch_sched_queue *p)
+{
+       struct adapter *adap = pi->adapter;
+       struct sched_class *e;
+       struct sched_queue_entry *qe = NULL;
+       struct sge_eth_txq *txq;
+       unsigned int qid;
+       int index = -1;
+       int err = 0;
+
+       if (p->queue < 0 || p->queue >= pi->nqsets)
+               return -ERANGE;
+
+       txq = &adap->sge.ethtxq[pi->first_qset + p->queue];
+       qid = txq->q.cntxt_id;
+
+       /* Find the existing class that the queue is bound to */
+       e = t4_sched_queue_lookup(pi, qid, &index);
+       if (e && index >= 0) {
+               int i = 0;
+
+               spin_lock(&e->lock);
+               list_for_each_entry(qe, &e->queue_list, list) {
+                       if (i == index)
+                               break;
+                       i++;
+               }
+               err = t4_sched_bind_unbind_op(pi, (void *)qe, SCHED_QUEUE,
+                                             false);
+               if (err) {
+                       spin_unlock(&e->lock);
+                       goto out;
+               }
+
+               list_del(&qe->list);
+               t4_free_mem(qe);
+               if (atomic_dec_and_test(&e->refcnt)) {
+                       e->state = SCHED_STATE_UNUSED;
+                       memset(&e->info, 0, sizeof(e->info));
+               }
+               spin_unlock(&e->lock);
+       }
+out:
+       return err;
+}
+
+static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p)
+{
+       struct adapter *adap = pi->adapter;
+       struct sched_table *s = pi->sched_tbl;
+       struct sched_class *e;
+       struct sched_queue_entry *qe = NULL;
+       struct sge_eth_txq *txq;
+       unsigned int qid;
+       int err = 0;
+
+       if (p->queue < 0 || p->queue >= pi->nqsets)
+               return -ERANGE;
+
+       qe = t4_alloc_mem(sizeof(struct sched_queue_entry));
+       if (!qe)
+               return -ENOMEM;
+
+       txq = &adap->sge.ethtxq[pi->first_qset + p->queue];
+       qid = txq->q.cntxt_id;
+
+       /* Unbind queue from any existing class */
+       err = t4_sched_queue_unbind(pi, p);
+       if (err)
+               goto out;
+
+       /* Bind queue to specified class */
+       memset(qe, 0, sizeof(*qe));
+       qe->cntxt_id = qid;
+       memcpy(&qe->param, p, sizeof(qe->param));
+
+       e = &s->tab[qe->param.class];
+       spin_lock(&e->lock);
+       err = t4_sched_bind_unbind_op(pi, (void *)qe, SCHED_QUEUE, true);
+       if (err) {
+               t4_free_mem(qe);
+               spin_unlock(&e->lock);
+               goto out;
+       }
+
+       list_add_tail(&qe->list, &e->queue_list);
+       atomic_inc(&e->refcnt);
+       spin_unlock(&e->lock);
+out:
+       return err;
+}
+
+static void t4_sched_class_unbind_all(struct port_info *pi,
+                                     struct sched_class *e,
+                                     enum sched_bind_type type)
+{
+       if (!e)
+               return;
+
+       switch (type) {
+       case SCHED_QUEUE: {
+               struct sched_queue_entry *qe;
+
+               list_for_each_entry(qe, &e->queue_list, list)
+                       t4_sched_queue_unbind(pi, &qe->param);
+               break;
+       }
+       default:
+               break;
+       }
+}
+
+static int t4_sched_class_bind_unbind_op(struct port_info *pi, void *arg,
+                                        enum sched_bind_type type, bool bind)
+{
+       int err = 0;
+
+       if (!arg)
+               return -EINVAL;
+
+       switch (type) {
+       case SCHED_QUEUE: {
+               struct ch_sched_queue *qe = (struct ch_sched_queue *)arg;
+
+               if (bind)
+                       err = t4_sched_queue_bind(pi, qe);
+               else
+                       err = t4_sched_queue_unbind(pi, qe);
+               break;
+       }
+       default:
+               err = -ENOTSUPP;
+               break;
+       }
+
+       return err;
+}
+
+/**
+ * cxgb4_sched_class_bind - Bind an entity to a scheduling class
+ * @dev: net_device pointer
+ * @arg: Entity opaque data
+ * @type: Entity type (Queue)
+ *
+ * Binds an entity (queue) to a scheduling class.  If the entity
+ * is bound to another class, it will be unbound from the other class
+ * and bound to the class specified in @arg.
+ */
+int cxgb4_sched_class_bind(struct net_device *dev, void *arg,
+                          enum sched_bind_type type)
+{
+       struct port_info *pi = netdev2pinfo(dev);
+       struct sched_table *s;
+       int err = 0;
+       u8 class_id;
+
+       if (!can_sched(dev))
+               return -ENOTSUPP;
+
+       if (!arg)
+               return -EINVAL;
+
+       switch (type) {
+       case SCHED_QUEUE: {
+               struct ch_sched_queue *qe = (struct ch_sched_queue *)arg;
+
+               class_id = qe->class;
+               break;
+       }
+       default:
+               return -ENOTSUPP;
+       }
+
+       if (!valid_class_id(dev, class_id))
+               return -EINVAL;
+
+       if (class_id == SCHED_CLS_NONE)
+               return -ENOTSUPP;
+
+       s = pi->sched_tbl;
+       write_lock(&s->rw_lock);
+       err = t4_sched_class_bind_unbind_op(pi, arg, type, true);
+       write_unlock(&s->rw_lock);
+
+       return err;
+}
+
+/**
+ * cxgb4_sched_class_unbind - Unbind an entity from a scheduling class
+ * @dev: net_device pointer
+ * @arg: Entity opaque data
+ * @type: Entity type (Queue)
+ *
+ * Unbinds an entity (queue) from a scheduling class.
+ */
+int cxgb4_sched_class_unbind(struct net_device *dev, void *arg,
+                            enum sched_bind_type type)
+{
+       struct port_info *pi = netdev2pinfo(dev);
+       struct sched_table *s;
+       int err = 0;
+       u8 class_id;
+
+       if (!can_sched(dev))
+               return -ENOTSUPP;
+
+       if (!arg)
+               return -EINVAL;
+
+       switch (type) {
+       case SCHED_QUEUE: {
+               struct ch_sched_queue *qe = (struct ch_sched_queue *)arg;
+
+               class_id = qe->class;
+               break;
+       }
+       default:
+               return -ENOTSUPP;
+       }
+
+       if (!valid_class_id(dev, class_id))
+               return -EINVAL;
+
+       s = pi->sched_tbl;
+       write_lock(&s->rw_lock);
+       err = t4_sched_class_bind_unbind_op(pi, arg, type, false);
+       write_unlock(&s->rw_lock);
+
+       return err;
+}
+
+/* If @p is NULL, fetch any available unused class */
+static struct sched_class *t4_sched_class_lookup(struct port_info *pi,
+                                               const struct ch_sched_params *p)
+{
+       struct sched_table *s = pi->sched_tbl;
+       struct sched_class *e, *end;
+       struct sched_class *found = NULL;
+
+       if (!p) {
+               /* Get any available unused class */
+               end = &s->tab[s->sched_size];
+               for (e = &s->tab[0]; e != end; ++e) {
+                       if (e->state == SCHED_STATE_UNUSED) {
+                               found = e;
+                               break;
+                       }
+               }
+       } else {
+               /* Look for a class with matching scheduling parameters */
+               struct ch_sched_params info;
+               struct ch_sched_params tp;
+
+               memset(&info, 0, sizeof(info));
+               memset(&tp, 0, sizeof(tp));
+
+               memcpy(&tp, p, sizeof(tp));
+               /* Don't try to match class parameter */
+               tp.u.params.class = SCHED_CLS_NONE;
+
+               end = &s->tab[s->sched_size];
+               for (e = &s->tab[0]; e != end; ++e) {
+                       if (e->state == SCHED_STATE_UNUSED)
+                               continue;
+
+                       memset(&info, 0, sizeof(info));
+                       memcpy(&info, &e->info, sizeof(info));
+                       /* Don't try to match class parameter */
+                       info.u.params.class = SCHED_CLS_NONE;
+
+                       if ((info.type == tp.type) &&
+                           (!memcmp(&info.u.params, &tp.u.params,
+                                    sizeof(info.u.params)))) {
+                               found = e;
+                               break;
+                       }
+               }
+       }
+
+       return found;
+}
+
+static struct sched_class *t4_sched_class_alloc(struct port_info *pi,
+                                               struct ch_sched_params *p)
+{
+       struct sched_table *s = pi->sched_tbl;
+       struct sched_class *e;
+       u8 class_id;
+       int err;
+
+       if (!p)
+               return NULL;
+
+       class_id = p->u.params.class;
+
+       /* Only accept search for existing class with matching params
+        * or allocation of new class with specified params
+        */
+       if (class_id != SCHED_CLS_NONE)
+               return NULL;
+
+       write_lock(&s->rw_lock);
+       /* See if there's an exisiting class with same
+        * requested sched params
+        */
+       e = t4_sched_class_lookup(pi, p);
+       if (!e) {
+               struct ch_sched_params np;
+
+               /* Fetch any available unused class */
+               e = t4_sched_class_lookup(pi, NULL);
+               if (!e)
+                       goto out;
+
+               memset(&np, 0, sizeof(np));
+               memcpy(&np, p, sizeof(np));
+               np.u.params.class = e->idx;
+
+               spin_lock(&e->lock);
+               /* New class */
+               err = t4_sched_class_fw_cmd(pi, &np, SCHED_FW_OP_ADD);
+               if (err) {
+                       spin_unlock(&e->lock);
+                       e = NULL;
+                       goto out;
+               }
+               memcpy(&e->info, &np, sizeof(e->info));
+               atomic_set(&e->refcnt, 0);
+               e->state = SCHED_STATE_ACTIVE;
+               spin_unlock(&e->lock);
+       }
+
+out:
+       write_unlock(&s->rw_lock);
+       return e;
+}
+
+/**
+ * cxgb4_sched_class_alloc - allocate a scheduling class
+ * @dev: net_device pointer
+ * @p: new scheduling class to create.
+ *
+ * Returns pointer to the scheduling class created.  If @p is NULL, then
+ * it allocates and returns any available unused scheduling class. If a
+ * scheduling class with matching @p is found, then the matching class is
+ * returned.
+ */
+struct sched_class *cxgb4_sched_class_alloc(struct net_device *dev,
+                                           struct ch_sched_params *p)
+{
+       struct port_info *pi = netdev2pinfo(dev);
+       u8 class_id;
+
+       if (!can_sched(dev))
+               return NULL;
+
+       class_id = p->u.params.class;
+       if (!valid_class_id(dev, class_id))
+               return NULL;
+
+       return t4_sched_class_alloc(pi, p);
+}
+
+static void t4_sched_class_free(struct port_info *pi, struct sched_class *e)
+{
+       t4_sched_class_unbind_all(pi, e, SCHED_QUEUE);
+}
+
+struct sched_table *t4_init_sched(unsigned int sched_size)
+{
+       struct sched_table *s;
+       unsigned int i;
+
+       s = t4_alloc_mem(sizeof(*s) + sched_size * sizeof(struct sched_class));
+       if (!s)
+               return NULL;
+
+       s->sched_size = sched_size;
+       rwlock_init(&s->rw_lock);
+
+       for (i = 0; i < s->sched_size; i++) {
+               memset(&s->tab[i], 0, sizeof(struct sched_class));
+               s->tab[i].idx = i;
+               s->tab[i].state = SCHED_STATE_UNUSED;
+               INIT_LIST_HEAD(&s->tab[i].queue_list);
+               spin_lock_init(&s->tab[i].lock);
+               atomic_set(&s->tab[i].refcnt, 0);
+       }
+       return s;
+}
+
+void t4_cleanup_sched(struct adapter *adap)
+{
+       struct sched_table *s;
+       unsigned int i;
+
+       for_each_port(adap, i) {
+               struct port_info *pi = netdev2pinfo(adap->port[i]);
+
+               s = pi->sched_tbl;
+               for (i = 0; i < s->sched_size; i++) {
+                       struct sched_class *e;
+
+                       write_lock(&s->rw_lock);
+                       e = &s->tab[i];
+                       if (e->state == SCHED_STATE_ACTIVE)
+                               t4_sched_class_free(pi, e);
+                       write_unlock(&s->rw_lock);
+               }
+               t4_free_mem(s);
+       }
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.h b/drivers/net/ethernet/chelsio/cxgb4/sched.h
new file mode 100644 (file)
index 0000000..77b2b3f
--- /dev/null
@@ -0,0 +1,110 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_SCHED_H
+#define __CXGB4_SCHED_H
+
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+
+#define SCHED_CLS_NONE 0xff
+
+#define FW_SCHED_CLS_NONE 0xffffffff
+
+/* Max rate that can be set to a scheduling class is 10 Gbps */
+#define SCHED_MAX_RATE_KBPS 10000000U
+
+enum {
+       SCHED_STATE_ACTIVE,
+       SCHED_STATE_UNUSED,
+};
+
+enum sched_fw_ops {
+       SCHED_FW_OP_ADD,
+};
+
+enum sched_bind_type {
+       SCHED_QUEUE,
+};
+
+struct sched_queue_entry {
+       struct list_head list;
+       unsigned int cntxt_id;
+       struct ch_sched_queue param;
+};
+
+struct sched_class {
+       u8 state;
+       u8 idx;
+       struct ch_sched_params info;
+       struct list_head queue_list;
+       spinlock_t lock; /* Per class lock */
+       atomic_t refcnt;
+};
+
+struct sched_table {      /* per port scheduling table */
+       u8 sched_size;
+       rwlock_t rw_lock; /* Table lock */
+       struct sched_class tab[0];
+};
+
+static inline bool can_sched(struct net_device *dev)
+{
+       struct port_info *pi = netdev2pinfo(dev);
+
+       return !pi->sched_tbl ? false : true;
+}
+
+static inline bool valid_class_id(struct net_device *dev, u8 class_id)
+{
+       struct port_info *pi = netdev2pinfo(dev);
+
+       if ((class_id > pi->sched_tbl->sched_size - 1) &&
+           (class_id != SCHED_CLS_NONE))
+               return false;
+
+       return true;
+}
+
+int cxgb4_sched_class_bind(struct net_device *dev, void *arg,
+                          enum sched_bind_type type);
+int cxgb4_sched_class_unbind(struct net_device *dev, void *arg,
+                            enum sched_bind_type type);
+
+struct sched_class *cxgb4_sched_class_alloc(struct net_device *dev,
+                                           struct ch_sched_params *p);
+
+struct sched_table *t4_init_sched(unsigned int size);
+void t4_cleanup_sched(struct adapter *adap);
+#endif  /* __CXGB4_SCHED_H */
index ad3552d..9a607db 100644 (file)
@@ -2928,8 +2928,8 @@ static void free_txq(struct adapter *adap, struct sge_txq *q)
        q->desc = NULL;
 }
 
-static void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq,
-                        struct sge_fl *fl)
+void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq,
+                 struct sge_fl *fl)
 {
        struct sge *s = &adap->sge;
        unsigned int fl_id = fl ? fl->cntxt_id : 0xffff;
index 2a476cc..15be543 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the Chelsio T4 Ethernet driver for Linux.
  *
- * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -2729,7 +2729,7 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p)
 
 out:
        vfree(vpd);
-       return ret;
+       return ret < 0 ? ret : 0;
 }
 
 /**
@@ -8305,3 +8305,32 @@ int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
 
        return t4_wr_mbox(adapter, adapter->mbox, &cmd, sizeof(cmd), &cmd);
 }
+
+int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
+                   int rateunit, int ratemode, int channel, int class,
+                   int minrate, int maxrate, int weight, int pktsize)
+{
+       struct fw_sched_cmd cmd;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.op_to_write = cpu_to_be32(FW_CMD_OP_V(FW_SCHED_CMD) |
+                                     FW_CMD_REQUEST_F |
+                                     FW_CMD_WRITE_F);
+       cmd.retval_len16 = cpu_to_be32(FW_LEN16(cmd));
+
+       cmd.u.params.sc = FW_SCHED_SC_PARAMS;
+       cmd.u.params.type = type;
+       cmd.u.params.level = level;
+       cmd.u.params.mode = mode;
+       cmd.u.params.ch = channel;
+       cmd.u.params.cl = class;
+       cmd.u.params.unit = rateunit;
+       cmd.u.params.rate = ratemode;
+       cmd.u.params.min = cpu_to_be32(minrate);
+       cmd.u.params.max = cpu_to_be32(maxrate);
+       cmd.u.params.weight = cpu_to_be16(weight);
+       cmd.u.params.pktsize = cpu_to_be16(pktsize);
+
+       return t4_wr_mbox_meat(adapter, adapter->mbox, &cmd, sizeof(cmd),
+                              NULL, 1);
+}
index e0ebe13..fba3b2a 100644 (file)
@@ -61,6 +61,7 @@ enum {
        CPL_ABORT_REQ_RSS     = 0x2B,
        CPL_ABORT_RPL_RSS     = 0x2D,
 
+       CPL_RX_PHYS_ADDR      = 0x30,
        CPL_CLOSE_CON_RPL     = 0x32,
        CPL_ISCSI_HDR         = 0x33,
        CPL_RDMA_CQE          = 0x35,
@@ -83,6 +84,10 @@ enum {
        CPL_PASS_OPEN_REQ6    = 0x81,
        CPL_ACT_OPEN_REQ6     = 0x83,
 
+       CPL_TX_TLS_PDU     =    0x88,
+       CPL_TX_SEC_PDU        = 0x8A,
+       CPL_TX_TLS_ACK        = 0x8B,
+
        CPL_RDMA_TERMINATE    = 0xA2,
        CPL_RDMA_WRITE        = 0xA4,
        CPL_SGE_EGR_UPDATE    = 0xA5,
@@ -94,6 +99,8 @@ enum {
        CPL_FW4_PLD           = 0xC1,
        CPL_FW4_ACK           = 0xC3,
 
+       CPL_RX_PHYS_DSGL      = 0xD0,
+
        CPL_FW6_MSG           = 0xE0,
        CPL_FW6_PLD           = 0xE1,
        CPL_TX_PKT_LSO        = 0xED,
@@ -1362,6 +1369,15 @@ struct ulptx_idata {
        __be32 len;
 };
 
+struct ulp_txpkt {
+       __be32 cmd_dest;
+       __be32 len;
+};
+
+#define ULPTX_CMD_S    24
+#define ULPTX_CMD_M    0xFF
+#define ULPTX_CMD_V(x) ((x) << ULPTX_CMD_S)
+
 #define ULPTX_NSGE_S    0
 #define ULPTX_NSGE_V(x) ((x) << ULPTX_NSGE_S)
 
@@ -1369,6 +1385,22 @@ struct ulptx_idata {
 #define ULPTX_MORE_V(x)        ((x) << ULPTX_MORE_S)
 #define ULPTX_MORE_F   ULPTX_MORE_V(1U)
 
+#define ULP_TXPKT_DEST_S    16
+#define ULP_TXPKT_DEST_M    0x3
+#define ULP_TXPKT_DEST_V(x) ((x) << ULP_TXPKT_DEST_S)
+
+#define ULP_TXPKT_FID_S     4
+#define ULP_TXPKT_FID_M     0x7ff
+#define ULP_TXPKT_FID_V(x)  ((x) << ULP_TXPKT_FID_S)
+
+#define ULP_TXPKT_RO_S      3
+#define ULP_TXPKT_RO_V(x) ((x) << ULP_TXPKT_RO_S)
+#define ULP_TXPKT_RO_F ULP_TXPKT_RO_V(1U)
+
+#define ULP_TX_SC_MORE_S 23
+#define ULP_TX_SC_MORE_V(x) ((x) << ULP_TX_SC_MORE_S)
+#define ULP_TX_SC_MORE_F  ULP_TX_SC_MORE_V(1U)
+
 struct ulp_mem_io {
        WR_HDR;
        __be32 cmd;
@@ -1406,4 +1438,409 @@ struct ulp_mem_io {
 #define ULP_MEMIO_DATA_LEN_S    0
 #define ULP_MEMIO_DATA_LEN_V(x) ((x) << ULP_MEMIO_DATA_LEN_S)
 
+#define ULPTX_NSGE_S    0
+#define ULPTX_NSGE_M    0xFFFF
+#define ULPTX_NSGE_V(x) ((x) << ULPTX_NSGE_S)
+#define ULPTX_NSGE_G(x) (((x) >> ULPTX_NSGE_S) & ULPTX_NSGE_M)
+
+struct ulptx_sc_memrd {
+       __be32 cmd_to_len;
+       __be32 addr;
+};
+
+#define ULP_TXPKT_DATAMODIFY_S       23
+#define ULP_TXPKT_DATAMODIFY_M       0x1
+#define ULP_TXPKT_DATAMODIFY_V(x)    ((x) << ULP_TXPKT_DATAMODIFY_S)
+#define ULP_TXPKT_DATAMODIFY_G(x)    \
+       (((x) >> ULP_TXPKT_DATAMODIFY_S) & ULP_TXPKT_DATAMODIFY__M)
+#define ULP_TXPKT_DATAMODIFY_F       ULP_TXPKT_DATAMODIFY_V(1U)
+
+#define ULP_TXPKT_CHANNELID_S        22
+#define ULP_TXPKT_CHANNELID_M        0x1
+#define ULP_TXPKT_CHANNELID_V(x)     ((x) << ULP_TXPKT_CHANNELID_S)
+#define ULP_TXPKT_CHANNELID_G(x)     \
+       (((x) >> ULP_TXPKT_CHANNELID_S) & ULP_TXPKT_CHANNELID_M)
+#define ULP_TXPKT_CHANNELID_F        ULP_TXPKT_CHANNELID_V(1U)
+
+#define SCMD_SEQ_NO_CTRL_S      29
+#define SCMD_SEQ_NO_CTRL_M      0x3
+#define SCMD_SEQ_NO_CTRL_V(x)   ((x) << SCMD_SEQ_NO_CTRL_S)
+#define SCMD_SEQ_NO_CTRL_G(x)   \
+       (((x) >> SCMD_SEQ_NO_CTRL_S) & SCMD_SEQ_NO_CTRL_M)
+
+/* StsFieldPrsnt- Status field at the end of the TLS PDU */
+#define SCMD_STATUS_PRESENT_S   28
+#define SCMD_STATUS_PRESENT_M   0x1
+#define SCMD_STATUS_PRESENT_V(x)    ((x) << SCMD_STATUS_PRESENT_S)
+#define SCMD_STATUS_PRESENT_G(x)    \
+       (((x) >> SCMD_STATUS_PRESENT_S) & SCMD_STATUS_PRESENT_M)
+#define SCMD_STATUS_PRESENT_F   SCMD_STATUS_PRESENT_V(1U)
+
+/* ProtoVersion - Protocol Version 0: 1.2, 1:1.1, 2:DTLS, 3:Generic,
+ * 3-15: Reserved.
+ */
+#define SCMD_PROTO_VERSION_S    24
+#define SCMD_PROTO_VERSION_M    0xf
+#define SCMD_PROTO_VERSION_V(x) ((x) << SCMD_PROTO_VERSION_S)
+#define SCMD_PROTO_VERSION_G(x) \
+       (((x) >> SCMD_PROTO_VERSION_S) & SCMD_PROTO_VERSION_M)
+
+/* EncDecCtrl - Encryption/Decryption Control. 0: Encrypt, 1: Decrypt */
+#define SCMD_ENC_DEC_CTRL_S     23
+#define SCMD_ENC_DEC_CTRL_M     0x1
+#define SCMD_ENC_DEC_CTRL_V(x)  ((x) << SCMD_ENC_DEC_CTRL_S)
+#define SCMD_ENC_DEC_CTRL_G(x)  \
+       (((x) >> SCMD_ENC_DEC_CTRL_S) & SCMD_ENC_DEC_CTRL_M)
+#define SCMD_ENC_DEC_CTRL_F SCMD_ENC_DEC_CTRL_V(1U)
+
+/* CipherAuthSeqCtrl - Cipher Authentication Sequence Control. */
+#define SCMD_CIPH_AUTH_SEQ_CTRL_S       22
+#define SCMD_CIPH_AUTH_SEQ_CTRL_M       0x1
+#define SCMD_CIPH_AUTH_SEQ_CTRL_V(x)    \
+       ((x) << SCMD_CIPH_AUTH_SEQ_CTRL_S)
+#define SCMD_CIPH_AUTH_SEQ_CTRL_G(x)    \
+       (((x) >> SCMD_CIPH_AUTH_SEQ_CTRL_S) & SCMD_CIPH_AUTH_SEQ_CTRL_M)
+#define SCMD_CIPH_AUTH_SEQ_CTRL_F   SCMD_CIPH_AUTH_SEQ_CTRL_V(1U)
+
+/* CiphMode -  Cipher Mode. 0: NOP, 1:AES-CBC, 2:AES-GCM, 3:AES-CTR,
+ * 4:Generic-AES, 5-15: Reserved.
+ */
+#define SCMD_CIPH_MODE_S    18
+#define SCMD_CIPH_MODE_M    0xf
+#define SCMD_CIPH_MODE_V(x) ((x) << SCMD_CIPH_MODE_S)
+#define SCMD_CIPH_MODE_G(x) \
+       (((x) >> SCMD_CIPH_MODE_S) & SCMD_CIPH_MODE_M)
+
+/* AuthMode - Auth Mode. 0: NOP, 1:SHA1, 2:SHA2-224, 3:SHA2-256
+ * 4-15: Reserved
+ */
+#define SCMD_AUTH_MODE_S    14
+#define SCMD_AUTH_MODE_M    0xf
+#define SCMD_AUTH_MODE_V(x) ((x) << SCMD_AUTH_MODE_S)
+#define SCMD_AUTH_MODE_G(x) \
+       (((x) >> SCMD_AUTH_MODE_S) & SCMD_AUTH_MODE_M)
+
+/* HmacCtrl - HMAC Control. 0:NOP, 1:No truncation, 2:Support HMAC Truncation
+ * per RFC 4366, 3:IPSec 96 bits, 4-7:Reserved
+ */
+#define SCMD_HMAC_CTRL_S    11
+#define SCMD_HMAC_CTRL_M    0x7
+#define SCMD_HMAC_CTRL_V(x) ((x) << SCMD_HMAC_CTRL_S)
+#define SCMD_HMAC_CTRL_G(x) \
+       (((x) >> SCMD_HMAC_CTRL_S) & SCMD_HMAC_CTRL_M)
+
+/* IvSize - IV size in units of 2 bytes */
+#define SCMD_IV_SIZE_S  7
+#define SCMD_IV_SIZE_M  0xf
+#define SCMD_IV_SIZE_V(x)   ((x) << SCMD_IV_SIZE_S)
+#define SCMD_IV_SIZE_G(x)   \
+       (((x) >> SCMD_IV_SIZE_S) & SCMD_IV_SIZE_M)
+
+/* NumIVs - Number of IVs */
+#define SCMD_NUM_IVS_S  0
+#define SCMD_NUM_IVS_M  0x7f
+#define SCMD_NUM_IVS_V(x)   ((x) << SCMD_NUM_IVS_S)
+#define SCMD_NUM_IVS_G(x)   \
+       (((x) >> SCMD_NUM_IVS_S) & SCMD_NUM_IVS_M)
+
+/* EnbDbgId - If this is enabled upper 20 (63:44) bits if SeqNumber
+ * (below) are used as Cid (connection id for debug status), these
+ * bits are padded to zero for forming the 64 bit
+ * sequence number for TLS
+ */
+#define SCMD_ENB_DBGID_S  31
+#define SCMD_ENB_DBGID_M  0x1
+#define SCMD_ENB_DBGID_V(x)   ((x) << SCMD_ENB_DBGID_S)
+#define SCMD_ENB_DBGID_G(x)   \
+       (((x) >> SCMD_ENB_DBGID_S) & SCMD_ENB_DBGID_M)
+
+/* IV generation in SW. */
+#define SCMD_IV_GEN_CTRL_S      30
+#define SCMD_IV_GEN_CTRL_M      0x1
+#define SCMD_IV_GEN_CTRL_V(x)   ((x) << SCMD_IV_GEN_CTRL_S)
+#define SCMD_IV_GEN_CTRL_G(x)   \
+       (((x) >> SCMD_IV_GEN_CTRL_S) & SCMD_IV_GEN_CTRL_M)
+#define SCMD_IV_GEN_CTRL_F  SCMD_IV_GEN_CTRL_V(1U)
+
+/* More frags */
+#define SCMD_MORE_FRAGS_S   20
+#define SCMD_MORE_FRAGS_M   0x1
+#define SCMD_MORE_FRAGS_V(x)    ((x) << SCMD_MORE_FRAGS_S)
+#define SCMD_MORE_FRAGS_G(x)    (((x) >> SCMD_MORE_FRAGS_S) & SCMD_MORE_FRAGS_M)
+
+/*last frag */
+#define SCMD_LAST_FRAG_S    19
+#define SCMD_LAST_FRAG_M    0x1
+#define SCMD_LAST_FRAG_V(x) ((x) << SCMD_LAST_FRAG_S)
+#define SCMD_LAST_FRAG_G(x) (((x) >> SCMD_LAST_FRAG_S) & SCMD_LAST_FRAG_M)
+
+/* TlsCompPdu */
+#define SCMD_TLS_COMPPDU_S    18
+#define SCMD_TLS_COMPPDU_M    0x1
+#define SCMD_TLS_COMPPDU_V(x) ((x) << SCMD_TLS_COMPPDU_S)
+#define SCMD_TLS_COMPPDU_G(x) (((x) >> SCMD_TLS_COMPPDU_S) & SCMD_TLS_COMPPDU_M)
+
+/* KeyCntxtInline - Key context inline after the scmd  OR PayloadOnly*/
+#define SCMD_KEY_CTX_INLINE_S   17
+#define SCMD_KEY_CTX_INLINE_M   0x1
+#define SCMD_KEY_CTX_INLINE_V(x)    ((x) << SCMD_KEY_CTX_INLINE_S)
+#define SCMD_KEY_CTX_INLINE_G(x)    \
+       (((x) >> SCMD_KEY_CTX_INLINE_S) & SCMD_KEY_CTX_INLINE_M)
+#define SCMD_KEY_CTX_INLINE_F   SCMD_KEY_CTX_INLINE_V(1U)
+
+/* TLSFragEnable - 0: Host created TLS PDUs, 1: TLS Framgmentation in ASIC */
+#define SCMD_TLS_FRAG_ENABLE_S  16
+#define SCMD_TLS_FRAG_ENABLE_M  0x1
+#define SCMD_TLS_FRAG_ENABLE_V(x)   ((x) << SCMD_TLS_FRAG_ENABLE_S)
+#define SCMD_TLS_FRAG_ENABLE_G(x)   \
+       (((x) >> SCMD_TLS_FRAG_ENABLE_S) & SCMD_TLS_FRAG_ENABLE_M)
+#define SCMD_TLS_FRAG_ENABLE_F  SCMD_TLS_FRAG_ENABLE_V(1U)
+
+/* MacOnly - Only send the MAC and discard PDU. This is valid for hash only
+ * modes, in this case TLS_TX  will drop the PDU and only
+ * send back the MAC bytes.
+ */
+#define SCMD_MAC_ONLY_S 15
+#define SCMD_MAC_ONLY_M 0x1
+#define SCMD_MAC_ONLY_V(x)  ((x) << SCMD_MAC_ONLY_S)
+#define SCMD_MAC_ONLY_G(x)  \
+       (((x) >> SCMD_MAC_ONLY_S) & SCMD_MAC_ONLY_M)
+#define SCMD_MAC_ONLY_F SCMD_MAC_ONLY_V(1U)
+
+/* AadIVDrop - Drop the AAD and IV fields. Useful in protocols
+ * which have complex AAD and IV formations Eg:AES-CCM
+ */
+#define SCMD_AADIVDROP_S 14
+#define SCMD_AADIVDROP_M 0x1
+#define SCMD_AADIVDROP_V(x)  ((x) << SCMD_AADIVDROP_S)
+#define SCMD_AADIVDROP_G(x)  \
+       (((x) >> SCMD_AADIVDROP_S) & SCMD_AADIVDROP_M)
+#define SCMD_AADIVDROP_F SCMD_AADIVDROP_V(1U)
+
+/* HdrLength - Length of all headers excluding TLS header
+ * present before start of crypto PDU/payload.
+ */
+#define SCMD_HDR_LEN_S  0
+#define SCMD_HDR_LEN_M  0x3fff
+#define SCMD_HDR_LEN_V(x)   ((x) << SCMD_HDR_LEN_S)
+#define SCMD_HDR_LEN_G(x)   \
+       (((x) >> SCMD_HDR_LEN_S) & SCMD_HDR_LEN_M)
+
+struct cpl_tx_sec_pdu {
+       __be32 op_ivinsrtofst;
+       __be32 pldlen;
+       __be32 aadstart_cipherstop_hi;
+       __be32 cipherstop_lo_authinsert;
+       __be32 seqno_numivs;
+       __be32 ivgen_hdrlen;
+       __be64 scmd1;
+};
+
+#define CPL_TX_SEC_PDU_OPCODE_S     24
+#define CPL_TX_SEC_PDU_OPCODE_M     0xff
+#define CPL_TX_SEC_PDU_OPCODE_V(x)  ((x) << CPL_TX_SEC_PDU_OPCODE_S)
+#define CPL_TX_SEC_PDU_OPCODE_G(x)  \
+       (((x) >> CPL_TX_SEC_PDU_OPCODE_S) & CPL_TX_SEC_PDU_OPCODE_M)
+
+/* RX Channel Id */
+#define CPL_TX_SEC_PDU_RXCHID_S  22
+#define CPL_TX_SEC_PDU_RXCHID_M  0x1
+#define CPL_TX_SEC_PDU_RXCHID_V(x)   ((x) << CPL_TX_SEC_PDU_RXCHID_S)
+#define CPL_TX_SEC_PDU_RXCHID_G(x)   \
+       (((x) >> CPL_TX_SEC_PDU_RXCHID_S) & CPL_TX_SEC_PDU_RXCHID_M)
+#define CPL_TX_SEC_PDU_RXCHID_F  CPL_TX_SEC_PDU_RXCHID_V(1U)
+
+/* Ack Follows */
+#define CPL_TX_SEC_PDU_ACKFOLLOWS_S  21
+#define CPL_TX_SEC_PDU_ACKFOLLOWS_M  0x1
+#define CPL_TX_SEC_PDU_ACKFOLLOWS_V(x)   ((x) << CPL_TX_SEC_PDU_ACKFOLLOWS_S)
+#define CPL_TX_SEC_PDU_ACKFOLLOWS_G(x)   \
+       (((x) >> CPL_TX_SEC_PDU_ACKFOLLOWS_S) & CPL_TX_SEC_PDU_ACKFOLLOWS_M)
+#define CPL_TX_SEC_PDU_ACKFOLLOWS_F  CPL_TX_SEC_PDU_ACKFOLLOWS_V(1U)
+
+/* Loopback bit in cpl_tx_sec_pdu */
+#define CPL_TX_SEC_PDU_ULPTXLPBK_S  20
+#define CPL_TX_SEC_PDU_ULPTXLPBK_M  0x1
+#define CPL_TX_SEC_PDU_ULPTXLPBK_V(x)   ((x) << CPL_TX_SEC_PDU_ULPTXLPBK_S)
+#define CPL_TX_SEC_PDU_ULPTXLPBK_G(x)   \
+       (((x) >> CPL_TX_SEC_PDU_ULPTXLPBK_S) & CPL_TX_SEC_PDU_ULPTXLPBK_M)
+#define CPL_TX_SEC_PDU_ULPTXLPBK_F  CPL_TX_SEC_PDU_ULPTXLPBK_V(1U)
+
+/* Length of cpl header encapsulated */
+#define CPL_TX_SEC_PDU_CPLLEN_S     16
+#define CPL_TX_SEC_PDU_CPLLEN_M     0xf
+#define CPL_TX_SEC_PDU_CPLLEN_V(x)  ((x) << CPL_TX_SEC_PDU_CPLLEN_S)
+#define CPL_TX_SEC_PDU_CPLLEN_G(x)  \
+       (((x) >> CPL_TX_SEC_PDU_CPLLEN_S) & CPL_TX_SEC_PDU_CPLLEN_M)
+
+/* PlaceHolder */
+#define CPL_TX_SEC_PDU_PLACEHOLDER_S    10
+#define CPL_TX_SEC_PDU_PLACEHOLDER_M    0x1
+#define CPL_TX_SEC_PDU_PLACEHOLDER_V(x) ((x) << CPL_TX_SEC_PDU_PLACEHOLDER_S)
+#define CPL_TX_SEC_PDU_PLACEHOLDER_G(x) \
+       (((x) >> CPL_TX_SEC_PDU_PLACEHOLDER_S) & \
+        CPL_TX_SEC_PDU_PLACEHOLDER_M)
+
+/* IvInsrtOffset: Insertion location for IV */
+#define CPL_TX_SEC_PDU_IVINSRTOFST_S    0
+#define CPL_TX_SEC_PDU_IVINSRTOFST_M    0x3ff
+#define CPL_TX_SEC_PDU_IVINSRTOFST_V(x) ((x) << CPL_TX_SEC_PDU_IVINSRTOFST_S)
+#define CPL_TX_SEC_PDU_IVINSRTOFST_G(x) \
+       (((x) >> CPL_TX_SEC_PDU_IVINSRTOFST_S) & \
+        CPL_TX_SEC_PDU_IVINSRTOFST_M)
+
+/* AadStartOffset: Offset in bytes for AAD start from
+ * the first byte following the pkt headers (0-255 bytes)
+ */
+#define CPL_TX_SEC_PDU_AADSTART_S   24
+#define CPL_TX_SEC_PDU_AADSTART_M   0xff
+#define CPL_TX_SEC_PDU_AADSTART_V(x)    ((x) << CPL_TX_SEC_PDU_AADSTART_S)
+#define CPL_TX_SEC_PDU_AADSTART_G(x)    \
+       (((x) >> CPL_TX_SEC_PDU_AADSTART_S) & \
+        CPL_TX_SEC_PDU_AADSTART_M)
+
+/* AadStopOffset: offset in bytes for AAD stop/end from the first byte following
+ * the pkt headers (0-511 bytes)
+ */
+#define CPL_TX_SEC_PDU_AADSTOP_S    15
+#define CPL_TX_SEC_PDU_AADSTOP_M    0x1ff
+#define CPL_TX_SEC_PDU_AADSTOP_V(x) ((x) << CPL_TX_SEC_PDU_AADSTOP_S)
+#define CPL_TX_SEC_PDU_AADSTOP_G(x) \
+       (((x) >> CPL_TX_SEC_PDU_AADSTOP_S) & CPL_TX_SEC_PDU_AADSTOP_M)
+
+/* CipherStartOffset: offset in bytes for encryption/decryption start from the
+ * first byte following the pkt headers (0-1023 bytes)
+ */
+#define CPL_TX_SEC_PDU_CIPHERSTART_S    5
+#define CPL_TX_SEC_PDU_CIPHERSTART_M    0x3ff
+#define CPL_TX_SEC_PDU_CIPHERSTART_V(x) ((x) << CPL_TX_SEC_PDU_CIPHERSTART_S)
+#define CPL_TX_SEC_PDU_CIPHERSTART_G(x) \
+       (((x) >> CPL_TX_SEC_PDU_CIPHERSTART_S) & \
+        CPL_TX_SEC_PDU_CIPHERSTART_M)
+
+/* CipherStopOffset: offset in bytes for encryption/decryption end
+ * from end of the payload of this command (0-511 bytes)
+ */
+#define CPL_TX_SEC_PDU_CIPHERSTOP_HI_S      0
+#define CPL_TX_SEC_PDU_CIPHERSTOP_HI_M      0x1f
+#define CPL_TX_SEC_PDU_CIPHERSTOP_HI_V(x)   \
+       ((x) << CPL_TX_SEC_PDU_CIPHERSTOP_HI_S)
+#define CPL_TX_SEC_PDU_CIPHERSTOP_HI_G(x)   \
+       (((x) >> CPL_TX_SEC_PDU_CIPHERSTOP_HI_S) & \
+        CPL_TX_SEC_PDU_CIPHERSTOP_HI_M)
+
+#define CPL_TX_SEC_PDU_CIPHERSTOP_LO_S      28
+#define CPL_TX_SEC_PDU_CIPHERSTOP_LO_M      0xf
+#define CPL_TX_SEC_PDU_CIPHERSTOP_LO_V(x)   \
+       ((x) << CPL_TX_SEC_PDU_CIPHERSTOP_LO_S)
+#define CPL_TX_SEC_PDU_CIPHERSTOP_LO_G(x)   \
+       (((x) >> CPL_TX_SEC_PDU_CIPHERSTOP_LO_S) & \
+        CPL_TX_SEC_PDU_CIPHERSTOP_LO_M)
+
+/* AuthStartOffset: offset in bytes for authentication start from
+ * the first byte following the pkt headers (0-1023)
+ */
+#define CPL_TX_SEC_PDU_AUTHSTART_S  18
+#define CPL_TX_SEC_PDU_AUTHSTART_M  0x3ff
+#define CPL_TX_SEC_PDU_AUTHSTART_V(x)   ((x) << CPL_TX_SEC_PDU_AUTHSTART_S)
+#define CPL_TX_SEC_PDU_AUTHSTART_G(x)   \
+       (((x) >> CPL_TX_SEC_PDU_AUTHSTART_S) & \
+        CPL_TX_SEC_PDU_AUTHSTART_M)
+
+/* AuthStopOffset: offset in bytes for authentication
+ * end from end of the payload of this command (0-511 Bytes)
+ */
+#define CPL_TX_SEC_PDU_AUTHSTOP_S   9
+#define CPL_TX_SEC_PDU_AUTHSTOP_M   0x1ff
+#define CPL_TX_SEC_PDU_AUTHSTOP_V(x)    ((x) << CPL_TX_SEC_PDU_AUTHSTOP_S)
+#define CPL_TX_SEC_PDU_AUTHSTOP_G(x)    \
+       (((x) >> CPL_TX_SEC_PDU_AUTHSTOP_S) & \
+        CPL_TX_SEC_PDU_AUTHSTOP_M)
+
+/* AuthInsrtOffset: offset in bytes for authentication insertion
+ * from end of the payload of this command (0-511 bytes)
+ */
+#define CPL_TX_SEC_PDU_AUTHINSERT_S 0
+#define CPL_TX_SEC_PDU_AUTHINSERT_M 0x1ff
+#define CPL_TX_SEC_PDU_AUTHINSERT_V(x)  ((x) << CPL_TX_SEC_PDU_AUTHINSERT_S)
+#define CPL_TX_SEC_PDU_AUTHINSERT_G(x)  \
+       (((x) >> CPL_TX_SEC_PDU_AUTHINSERT_S) & \
+        CPL_TX_SEC_PDU_AUTHINSERT_M)
+
+struct cpl_rx_phys_dsgl {
+       __be32 op_to_tid;
+       __be32 pcirlxorder_to_noofsgentr;
+       struct rss_header rss_hdr_int;
+};
+
+#define CPL_RX_PHYS_DSGL_OPCODE_S       24
+#define CPL_RX_PHYS_DSGL_OPCODE_M       0xff
+#define CPL_RX_PHYS_DSGL_OPCODE_V(x)    ((x) << CPL_RX_PHYS_DSGL_OPCODE_S)
+#define CPL_RX_PHYS_DSGL_OPCODE_G(x)    \
+       (((x) >> CPL_RX_PHYS_DSGL_OPCODE_S) & CPL_RX_PHYS_DSGL_OPCODE_M)
+
+#define CPL_RX_PHYS_DSGL_ISRDMA_S       23
+#define CPL_RX_PHYS_DSGL_ISRDMA_M       0x1
+#define CPL_RX_PHYS_DSGL_ISRDMA_V(x)    ((x) << CPL_RX_PHYS_DSGL_ISRDMA_S)
+#define CPL_RX_PHYS_DSGL_ISRDMA_G(x)    \
+       (((x) >> CPL_RX_PHYS_DSGL_ISRDMA_S) & CPL_RX_PHYS_DSGL_ISRDMA_M)
+#define CPL_RX_PHYS_DSGL_ISRDMA_F       CPL_RX_PHYS_DSGL_ISRDMA_V(1U)
+
+#define CPL_RX_PHYS_DSGL_RSVD1_S        20
+#define CPL_RX_PHYS_DSGL_RSVD1_M        0x7
+#define CPL_RX_PHYS_DSGL_RSVD1_V(x)     ((x) << CPL_RX_PHYS_DSGL_RSVD1_S)
+#define CPL_RX_PHYS_DSGL_RSVD1_G(x)     \
+       (((x) >> CPL_RX_PHYS_DSGL_RSVD1_S) & \
+        CPL_RX_PHYS_DSGL_RSVD1_M)
+
+#define CPL_RX_PHYS_DSGL_PCIRLXORDER_S          31
+#define CPL_RX_PHYS_DSGL_PCIRLXORDER_M          0x1
+#define CPL_RX_PHYS_DSGL_PCIRLXORDER_V(x)       \
+       ((x) << CPL_RX_PHYS_DSGL_PCIRLXORDER_S)
+#define CPL_RX_PHYS_DSGL_PCIRLXORDER_G(x)       \
+       (((x) >> CPL_RX_PHYS_DSGL_PCIRLXORDER_S) & \
+        CPL_RX_PHYS_DSGL_PCIRLXORDER_M)
+#define CPL_RX_PHYS_DSGL_PCIRLXORDER_F  CPL_RX_PHYS_DSGL_PCIRLXORDER_V(1U)
+
+#define CPL_RX_PHYS_DSGL_PCINOSNOOP_S           30
+#define CPL_RX_PHYS_DSGL_PCINOSNOOP_M           0x1
+#define CPL_RX_PHYS_DSGL_PCINOSNOOP_V(x)        \
+       ((x) << CPL_RX_PHYS_DSGL_PCINOSNOOP_S)
+#define CPL_RX_PHYS_DSGL_PCINOSNOOP_G(x)        \
+       (((x) >> CPL_RX_PHYS_DSGL_PCINOSNOOP_S) & \
+        CPL_RX_PHYS_DSGL_PCINOSNOOP_M)
+
+#define CPL_RX_PHYS_DSGL_PCINOSNOOP_F   CPL_RX_PHYS_DSGL_PCINOSNOOP_V(1U)
+
+#define CPL_RX_PHYS_DSGL_PCITPHNTENB_S          29
+#define CPL_RX_PHYS_DSGL_PCITPHNTENB_M          0x1
+#define CPL_RX_PHYS_DSGL_PCITPHNTENB_V(x)       \
+       ((x) << CPL_RX_PHYS_DSGL_PCITPHNTENB_S)
+#define CPL_RX_PHYS_DSGL_PCITPHNTENB_G(x)       \
+       (((x) >> CPL_RX_PHYS_DSGL_PCITPHNTENB_S) & \
+        CPL_RX_PHYS_DSGL_PCITPHNTENB_M)
+#define CPL_RX_PHYS_DSGL_PCITPHNTENB_F  CPL_RX_PHYS_DSGL_PCITPHNTENB_V(1U)
+
+#define CPL_RX_PHYS_DSGL_PCITPHNT_S     27
+#define CPL_RX_PHYS_DSGL_PCITPHNT_M     0x3
+#define CPL_RX_PHYS_DSGL_PCITPHNT_V(x)  ((x) << CPL_RX_PHYS_DSGL_PCITPHNT_S)
+#define CPL_RX_PHYS_DSGL_PCITPHNT_G(x)  \
+       (((x) >> CPL_RX_PHYS_DSGL_PCITPHNT_S) & \
+        CPL_RX_PHYS_DSGL_PCITPHNT_M)
+
+#define CPL_RX_PHYS_DSGL_DCAID_S        16
+#define CPL_RX_PHYS_DSGL_DCAID_M        0x7ff
+#define CPL_RX_PHYS_DSGL_DCAID_V(x)     ((x) << CPL_RX_PHYS_DSGL_DCAID_S)
+#define CPL_RX_PHYS_DSGL_DCAID_G(x)     \
+       (((x) >> CPL_RX_PHYS_DSGL_DCAID_S) & \
+        CPL_RX_PHYS_DSGL_DCAID_M)
+
+#define CPL_RX_PHYS_DSGL_NOOFSGENTR_S           0
+#define CPL_RX_PHYS_DSGL_NOOFSGENTR_M           0xffff
+#define CPL_RX_PHYS_DSGL_NOOFSGENTR_V(x)        \
+       ((x) << CPL_RX_PHYS_DSGL_NOOFSGENTR_S)
+#define CPL_RX_PHYS_DSGL_NOOFSGENTR_G(x)        \
+       (((x) >> CPL_RX_PHYS_DSGL_NOOFSGENTR_S) & \
+        CPL_RX_PHYS_DSGL_NOOFSGENTR_M)
+
 #endif  /* __T4_MSG_H */
index a89b307..ffe4bf4 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the Chelsio T4 Ethernet driver for Linux.
  *
- * Copyright (c) 2009-2014 Chelsio Communications, Inc. All rights reserved.
+ * Copyright (c) 2009-2016 Chelsio Communications, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -102,6 +102,7 @@ enum fw_wr_opcodes {
        FW_RI_FR_NSMR_WR               = 0x19,
        FW_RI_INV_LSTAG_WR             = 0x1a,
        FW_ISCSI_TX_DATA_WR            = 0x45,
+       FW_CRYPTO_LOOKASIDE_WR         = 0X6d,
        FW_LASTC2E_WR                  = 0x70
 };
 
@@ -680,6 +681,7 @@ enum fw_cmd_opcodes {
        FW_RSS_IND_TBL_CMD             = 0x20,
        FW_RSS_GLB_CONFIG_CMD          = 0x22,
        FW_RSS_VI_CONFIG_CMD           = 0x23,
+       FW_SCHED_CMD                   = 0x24,
        FW_DEVLOG_CMD                  = 0x25,
        FW_CLIP_CMD                    = 0x28,
        FW_LASTC2E_CMD                 = 0x40,
@@ -1060,7 +1062,7 @@ struct fw_caps_config_cmd {
        __be16 niccaps;
        __be16 ofldcaps;
        __be16 rdmacaps;
-       __be16 r4;
+       __be16 cryptocaps;
        __be16 iscsicaps;
        __be16 fcoecaps;
        __be32 cfcsum;
@@ -2961,6 +2963,41 @@ struct fw_rss_vi_config_cmd {
 #define FW_RSS_VI_CONFIG_CMD_UDPEN_V(x)        ((x) << FW_RSS_VI_CONFIG_CMD_UDPEN_S)
 #define FW_RSS_VI_CONFIG_CMD_UDPEN_F   FW_RSS_VI_CONFIG_CMD_UDPEN_V(1U)
 
+enum fw_sched_sc {
+       FW_SCHED_SC_PARAMS              = 1,
+};
+
+struct fw_sched_cmd {
+       __be32 op_to_write;
+       __be32 retval_len16;
+       union fw_sched {
+               struct fw_sched_config {
+                       __u8   sc;
+                       __u8   type;
+                       __u8   minmaxen;
+                       __u8   r3[5];
+                       __u8   nclasses[4];
+                       __be32 r4;
+               } config;
+               struct fw_sched_params {
+                       __u8   sc;
+                       __u8   type;
+                       __u8   level;
+                       __u8   mode;
+                       __u8   unit;
+                       __u8   rate;
+                       __u8   ch;
+                       __u8   cl;
+                       __be32 min;
+                       __be32 max;
+                       __be16 weight;
+                       __be16 pktsize;
+                       __be16 burstsize;
+                       __be16 r4;
+               } params;
+       } u;
+};
+
 struct fw_clip_cmd {
        __be32 op_to_write;
        __be32 alloc_to_len16;
@@ -3249,4 +3286,127 @@ struct fw_devlog_cmd {
 #define PCIE_FW_PF_DEVLOG_MEMTYPE_G(x) \
        (((x) >> PCIE_FW_PF_DEVLOG_MEMTYPE_S) & PCIE_FW_PF_DEVLOG_MEMTYPE_M)
 
+#define MAX_IMM_OFLD_TX_DATA_WR_LEN (0xff + sizeof(struct fw_ofld_tx_data_wr))
+
+struct fw_crypto_lookaside_wr {
+       __be32 op_to_cctx_size;
+       __be32 len16_pkd;
+       __be32 session_id;
+       __be32 rx_chid_to_rx_q_id;
+       __be32 key_addr;
+       __be32 pld_size_hash_size;
+       __be64 cookie;
+};
+
+#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_S 24
+#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_M 0xff
+#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_V(x) \
+       ((x) << FW_CRYPTO_LOOKASIDE_WR_OPCODE_S)
+#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_G(x) \
+       (((x) >> FW_CRYPTO_LOOKASIDE_WR_OPCODE_S) & \
+        FW_CRYPTO_LOOKASIDE_WR_OPCODE_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_COMPL_S 23
+#define FW_CRYPTO_LOOKASIDE_WR_COMPL_M 0x1
+#define FW_CRYPTO_LOOKASIDE_WR_COMPL_V(x) \
+       ((x) << FW_CRYPTO_LOOKASIDE_WR_COMPL_S)
+#define FW_CRYPTO_LOOKASIDE_WR_COMPL_G(x) \
+       (((x) >> FW_CRYPTO_LOOKASIDE_WR_COMPL_S) & \
+        FW_CRYPTO_LOOKASIDE_WR_COMPL_M)
+#define FW_CRYPTO_LOOKASIDE_WR_COMPL_F FW_CRYPTO_LOOKASIDE_WR_COMPL_V(1U)
+
+#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S 15
+#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_M 0xff
+#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_V(x) \
+       ((x) << FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S)
+#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_G(x) \
+       (((x) >> FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S) & \
+        FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S 5
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_M 0x3
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_V(x) \
+       ((x) << FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S)
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_G(x) \
+       (((x) >> FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S) & \
+        FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S 0
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_M 0x1f
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_V(x) \
+       ((x) << FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S)
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_G(x) \
+       (((x) >> FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S) & \
+        FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_LEN16_S 0
+#define FW_CRYPTO_LOOKASIDE_WR_LEN16_M 0xff
+#define FW_CRYPTO_LOOKASIDE_WR_LEN16_V(x) \
+       ((x) << FW_CRYPTO_LOOKASIDE_WR_LEN16_S)
+#define FW_CRYPTO_LOOKASIDE_WR_LEN16_G(x) \
+       (((x) >> FW_CRYPTO_LOOKASIDE_WR_LEN16_S) & \
+        FW_CRYPTO_LOOKASIDE_WR_LEN16_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S 29
+#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_M 0x3
+#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_V(x) \
+       ((x) << FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S)
+#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_G(x) \
+       (((x) >> FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S) & \
+        FW_CRYPTO_LOOKASIDE_WR_RX_CHID_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_LCB_S  27
+#define FW_CRYPTO_LOOKASIDE_WR_LCB_M  0x3
+#define FW_CRYPTO_LOOKASIDE_WR_LCB_V(x) \
+       ((x) << FW_CRYPTO_LOOKASIDE_WR_LCB_S)
+#define FW_CRYPTO_LOOKASIDE_WR_LCB_G(x) \
+       (((x) >> FW_CRYPTO_LOOKASIDE_WR_LCB_S) & FW_CRYPTO_LOOKASIDE_WR_LCB_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_PHASH_S 25
+#define FW_CRYPTO_LOOKASIDE_WR_PHASH_M 0x3
+#define FW_CRYPTO_LOOKASIDE_WR_PHASH_V(x) \
+       ((x) << FW_CRYPTO_LOOKASIDE_WR_PHASH_S)
+#define FW_CRYPTO_LOOKASIDE_WR_PHASH_G(x) \
+       (((x) >> FW_CRYPTO_LOOKASIDE_WR_PHASH_S) & \
+        FW_CRYPTO_LOOKASIDE_WR_PHASH_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_IV_S   23
+#define FW_CRYPTO_LOOKASIDE_WR_IV_M   0x3
+#define FW_CRYPTO_LOOKASIDE_WR_IV_V(x) \
+       ((x) << FW_CRYPTO_LOOKASIDE_WR_IV_S)
+#define FW_CRYPTO_LOOKASIDE_WR_IV_G(x) \
+       (((x) >> FW_CRYPTO_LOOKASIDE_WR_IV_S) & FW_CRYPTO_LOOKASIDE_WR_IV_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_S 10
+#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_M 0x3
+#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_V(x) \
+       ((x) << FW_CRYPTO_LOOKASIDE_WR_TX_CH_S)
+#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_G(x) \
+       (((x) >> FW_CRYPTO_LOOKASIDE_WR_TX_CH_S) & \
+        FW_CRYPTO_LOOKASIDE_WR_TX_CH_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S 0
+#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_M 0x3ff
+#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_V(x) \
+       ((x) << FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S)
+#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_G(x) \
+       (((x) >> FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S) & \
+        FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S 24
+#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_M 0xff
+#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_V(x) \
+       ((x) << FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S)
+#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_G(x) \
+       (((x) >> FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S) & \
+        FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S 17
+#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_M 0x7f
+#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_V(x) \
+       ((x) << FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S)
+#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_G(x) \
+       (((x) >> FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S) & \
+        FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_M)
+
 #endif /* _T4FW_INTERFACE_H_ */
index f2951bf..100b2cc 100644 (file)
@@ -2378,7 +2378,7 @@ static void size_nports_qsets(struct adapter *adapter)
         */
        pmask_nports = hweight32(adapter->params.vfres.pmask);
        if (pmask_nports < adapter->params.nports) {
-               dev_warn(adapter->pdev_dev, "only using %d of %d provissioned"
+               dev_warn(adapter->pdev_dev, "only using %d of %d provisioned"
                         " virtual interfaces; limited by Port Access Rights"
                         " mask %#x\n", pmask_nports, adapter->params.nports,
                         adapter->params.vfres.pmask);
index c8fd4f8..f3ed9ce 100644 (file)
@@ -1648,14 +1648,15 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp,
 
        if (csum_ok && !pkt->err_vec &&
            (be32_to_cpu(pkt->l2info) & (RXF_UDP_F | RXF_TCP_F))) {
-               if (!pkt->ip_frag)
+               if (!pkt->ip_frag) {
                        skb->ip_summed = CHECKSUM_UNNECESSARY;
-               else {
+                       rxq->stats.rx_cso++;
+               } else if (pkt->l2info & htonl(RXF_IP_F)) {
                        __sum16 c = (__force __sum16)pkt->csum;
                        skb->csum = csum_unfold(c);
                        skb->ip_summed = CHECKSUM_COMPLETE;
+                       rxq->stats.rx_cso++;
                }
-               rxq->stats.rx_cso++;
        } else
                skb_checksum_none_assert(skb);
 
index 2362230..2534e30 100644 (file)
@@ -1,3 +1,5 @@
+ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
+
 obj-$(CONFIG_CHELSIO_LIB) += libcxgb.o
 
-libcxgb-y := libcxgb_ppm.o
+libcxgb-y := libcxgb_ppm.o libcxgb_cm.o
diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
new file mode 100644 (file)
index 0000000..0f0de5b
--- /dev/null
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials
+ *       provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/tcp.h>
+#include <linux/ipv6.h>
+#include <net/route.h>
+#include <net/ip6_route.h>
+
+#include "libcxgb_cm.h"
+
+void
+cxgb_get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type,
+               int *iptype, __u8 *local_ip, __u8 *peer_ip,
+               __be16 *local_port, __be16 *peer_port)
+{
+       int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
+                     ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
+                     T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
+       int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
+                    IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
+                    T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
+       struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
+       struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
+       struct tcphdr *tcp = (struct tcphdr *)
+                            ((u8 *)(req + 1) + eth_len + ip_len);
+
+       if (ip->version == 4) {
+               pr_debug("%s saddr 0x%x daddr 0x%x sport %u dport %u\n",
+                        __func__, ntohl(ip->saddr), ntohl(ip->daddr),
+                        ntohs(tcp->source), ntohs(tcp->dest));
+               *iptype = 4;
+               memcpy(peer_ip, &ip->saddr, 4);
+               memcpy(local_ip, &ip->daddr, 4);
+       } else {
+               pr_debug("%s saddr %pI6 daddr %pI6 sport %u dport %u\n",
+                        __func__, ip6->saddr.s6_addr, ip6->daddr.s6_addr,
+                        ntohs(tcp->source), ntohs(tcp->dest));
+               *iptype = 6;
+               memcpy(peer_ip, ip6->saddr.s6_addr, 16);
+               memcpy(local_ip, ip6->daddr.s6_addr, 16);
+       }
+       *peer_port = tcp->source;
+       *local_port = tcp->dest;
+}
+EXPORT_SYMBOL(cxgb_get_4tuple);
+
+static bool
+cxgb_our_interface(struct cxgb4_lld_info *lldi,
+                  struct net_device *(*get_real_dev)(struct net_device *),
+                  struct net_device *egress_dev)
+{
+       int i;
+
+       egress_dev = get_real_dev(egress_dev);
+       for (i = 0; i < lldi->nports; i++)
+               if (lldi->ports[i] == egress_dev)
+                       return true;
+       return false;
+}
+
+struct dst_entry *
+cxgb_find_route(struct cxgb4_lld_info *lldi,
+               struct net_device *(*get_real_dev)(struct net_device *),
+               __be32 local_ip, __be32 peer_ip, __be16 local_port,
+               __be16 peer_port, u8 tos)
+{
+       struct rtable *rt;
+       struct flowi4 fl4;
+       struct neighbour *n;
+
+       rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
+                                  peer_port, local_port, IPPROTO_TCP,
+                                  tos, 0);
+       if (IS_ERR(rt))
+               return NULL;
+       n = dst_neigh_lookup(&rt->dst, &peer_ip);
+       if (!n)
+               return NULL;
+       if (!cxgb_our_interface(lldi, get_real_dev, n->dev) &&
+           !(n->dev->flags & IFF_LOOPBACK)) {
+               neigh_release(n);
+               dst_release(&rt->dst);
+               return NULL;
+       }
+       neigh_release(n);
+       return &rt->dst;
+}
+EXPORT_SYMBOL(cxgb_find_route);
+
+struct dst_entry *
+cxgb_find_route6(struct cxgb4_lld_info *lldi,
+                struct net_device *(*get_real_dev)(struct net_device *),
+                __u8 *local_ip, __u8 *peer_ip, __be16 local_port,
+                __be16 peer_port, u8 tos, __u32 sin6_scope_id)
+{
+       struct dst_entry *dst = NULL;
+
+       if (IS_ENABLED(CONFIG_IPV6)) {
+               struct flowi6 fl6;
+
+               memset(&fl6, 0, sizeof(fl6));
+               memcpy(&fl6.daddr, peer_ip, 16);
+               memcpy(&fl6.saddr, local_ip, 16);
+               if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
+                       fl6.flowi6_oif = sin6_scope_id;
+               dst = ip6_route_output(&init_net, NULL, &fl6);
+               if (!dst)
+                       goto out;
+               if (!cxgb_our_interface(lldi, get_real_dev,
+                                       ip6_dst_idev(dst)->dev) &&
+                   !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
+                       dst_release(dst);
+                       dst = NULL;
+               }
+       }
+
+out:
+       return dst;
+}
+EXPORT_SYMBOL(cxgb_find_route6);
diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h
new file mode 100644 (file)
index 0000000..515b94f
--- /dev/null
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials
+ *       provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIBCXGB_CM_H__
+#define __LIBCXGB_CM_H__
+
+
+#include <net/tcp.h>
+
+#include <cxgb4.h>
+#include <t4_msg.h>
+#include <l2t.h>
+
+void
+cxgb_get_4tuple(struct cpl_pass_accept_req *, enum chip_type,
+               int *, __u8 *, __u8 *, __be16 *, __be16 *);
+struct dst_entry *
+cxgb_find_route(struct cxgb4_lld_info *,
+               struct net_device *(*)(struct net_device *),
+               __be32, __be32, __be16, __be16, u8);
+struct dst_entry *
+cxgb_find_route6(struct cxgb4_lld_info *,
+                struct net_device *(*)(struct net_device *),
+                __u8 *, __u8 *, __be16, __be16, u8, __u32);
+
+/* Returns whether a CPL status conveys negative advice.
+ */
+static inline bool cxgb_is_neg_adv(unsigned int status)
+{
+       return status == CPL_ERR_RTX_NEG_ADVICE ||
+              status == CPL_ERR_PERSIST_NEG_ADVICE ||
+              status == CPL_ERR_KEEPALV_NEG_ADVICE;
+}
+
+static inline void
+cxgb_best_mtu(const unsigned short *mtus, unsigned short mtu,
+             unsigned int *idx, int use_ts, int ipv6)
+{
+       unsigned short hdr_size = (ipv6 ?
+                                  sizeof(struct ipv6hdr) :
+                                  sizeof(struct iphdr)) +
+                                 sizeof(struct tcphdr) +
+                                 (use_ts ?
+                                  round_up(TCPOLEN_TIMESTAMP, 4) : 0);
+       unsigned short data_size = mtu - hdr_size;
+
+       cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
+}
+
+static inline u32 cxgb_compute_wscale(u32 win)
+{
+       u32 wscale = 0;
+
+       while (wscale < 14 && (65535 << wscale) < win)
+               wscale++;
+       return wscale;
+}
+
+static inline void
+cxgb_mk_tid_release(struct sk_buff *skb, u32 len, u32 tid, u16 chan)
+{
+       struct cpl_tid_release *req;
+
+       req = (struct cpl_tid_release *)__skb_put(skb, len);
+       memset(req, 0, len);
+
+       INIT_TP_WR(req, tid);
+       OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
+       set_wr_txq(skb, CPL_PRIORITY_SETUP, chan);
+}
+
+static inline void
+cxgb_mk_close_con_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan,
+                     void *handle, arp_err_handler_t handler)
+{
+       struct cpl_close_con_req *req;
+
+       req = (struct cpl_close_con_req *)__skb_put(skb, len);
+       memset(req, 0, len);
+
+       INIT_TP_WR(req, tid);
+       OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
+       set_wr_txq(skb, CPL_PRIORITY_DATA, chan);
+       t4_set_arp_err_handler(skb, handle, handler);
+}
+
+static inline void
+cxgb_mk_abort_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan,
+                 void *handle, arp_err_handler_t handler)
+{
+       struct cpl_abort_req *req;
+
+       req = (struct cpl_abort_req *)__skb_put(skb, len);
+       memset(req, 0, len);
+
+       INIT_TP_WR(req, tid);
+       OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
+       req->cmd = CPL_ABORT_SEND_RST;
+       set_wr_txq(skb, CPL_PRIORITY_DATA, chan);
+       t4_set_arp_err_handler(skb, handle, handler);
+}
+
+static inline void
+cxgb_mk_abort_rpl(struct sk_buff *skb, u32 len, u32 tid, u16 chan)
+{
+       struct cpl_abort_rpl *rpl;
+
+       rpl = (struct cpl_abort_rpl *)__skb_put(skb, len);
+       memset(rpl, 0, len);
+
+       INIT_TP_WR(rpl, tid);
+       OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
+       rpl->cmd = CPL_ABORT_NO_RST;
+       set_wr_txq(skb, CPL_PRIORITY_DATA, chan);
+}
+
+static inline void
+cxgb_mk_rx_data_ack(struct sk_buff *skb, u32 len, u32 tid, u16 chan,
+                   u32 credit_dack)
+{
+       struct cpl_rx_data_ack *req;
+
+       req = (struct cpl_rx_data_ack *)__skb_put(skb, len);
+       memset(req, 0, len);
+
+       INIT_TP_WR(req, tid);
+       OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid));
+       req->credit_dack = cpu_to_be32(credit_dack);
+       set_wr_txq(skb, CPL_PRIORITY_ACK, chan);
+}
+#endif
index 58c6338..79d8009 100644 (file)
@@ -867,7 +867,7 @@ static int netdev_open(struct net_device *dev)
 
        /* Initialize other registers. */
        __set_mac_addr(dev);
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
        iowrite16(dev->mtu + 18, ioaddr + MaxFrameSize);
 #else
        iowrite16(dev->mtu + 14, ioaddr + MaxFrameSize);
index 86780b5..6cfa63a 100644 (file)
@@ -37,7 +37,7 @@
 #include "be_hw.h"
 #include "be_roce.h"
 
-#define DRV_VER                        "11.0.0.0"
+#define DRV_VER                        "11.1.0.0"
 #define DRV_NAME               "be2net"
 #define BE_NAME                        "Emulex BladeEngine2"
 #define BE3_NAME               "Emulex BladeEngine3"
@@ -399,13 +399,13 @@ enum vf_state {
 #define BE_FLAGS_PHY_MISCONFIGURED             BIT(10)
 #define BE_FLAGS_ERR_DETECTION_SCHEDULED       BIT(11)
 #define BE_FLAGS_OS2BMC                                BIT(12)
+#define BE_FLAGS_TRY_RECOVERY                  BIT(13)
 
 #define BE_UC_PMAC_COUNT                       30
 #define BE_VF_UC_PMAC_COUNT                    2
 
 #define MAX_ERR_RECOVERY_RETRY_COUNT           3
 #define ERR_DETECTION_DELAY                    1000
-#define ERR_RECOVERY_RETRY_DELAY               30000
 
 /* Ethtool set_dump flags */
 #define LANCER_INITIATE_FW_DUMP                        0x1
@@ -512,6 +512,66 @@ struct be_eth_addr {
        unsigned char mac[ETH_ALEN];
 };
 
+#define BE_SEC 1000                    /* in msec */
+#define BE_MIN (60 * BE_SEC)           /* in msec */
+#define BE_HOUR        (60 * BE_MIN)           /* in msec */
+
+#define ERR_RECOVERY_MAX_RETRY_COUNT           3
+#define ERR_RECOVERY_DETECTION_DELAY           BE_SEC
+#define ERR_RECOVERY_RETRY_DELAY               (30 * BE_SEC)
+
+/* UE-detection-duration in BEx/Skyhawk:
+ * All PFs must wait for this duration after they detect UE before reading
+ * SLIPORT_SEMAPHORE register. At the end of this duration, the Firmware
+ * guarantees that the SLIPORT_SEMAPHORE register is updated to indicate
+ * if the UE is recoverable.
+ */
+#define ERR_RECOVERY_UE_DETECT_DURATION                        BE_SEC
+
+/* Initial idle time (in msec) to elapse after driver load,
+ * before UE recovery is allowed.
+ */
+#define ERR_IDLE_HR                    24
+#define ERR_RECOVERY_IDLE_TIME         (ERR_IDLE_HR * BE_HOUR)
+
+/* Time interval (in msec) after which UE recovery can be repeated */
+#define ERR_INTERVAL_HR                        72
+#define ERR_RECOVERY_INTERVAL          (ERR_INTERVAL_HR * BE_HOUR)
+
+/* BEx/SH UE recovery state machine */
+enum {
+       ERR_RECOVERY_ST_NONE = 0,               /* No Recovery */
+       ERR_RECOVERY_ST_DETECT = 1,             /* UE detection duration */
+       ERR_RECOVERY_ST_RESET = 2,              /* Reset Phase (PF0 only) */
+       ERR_RECOVERY_ST_PRE_POLL = 3,           /* Pre-Poll Phase (all PFs) */
+       ERR_RECOVERY_ST_REINIT = 4              /* Re-initialize Phase */
+};
+
+struct be_error_recovery {
+       /* Lancer error recovery variables */
+       u8 recovery_retries;
+
+       /* BEx/Skyhawk error recovery variables */
+       u8 recovery_state;
+       u16 ue_to_reset_time;           /* Time after UE, to soft reset
+                                        * the chip - PF0 only
+                                        */
+       u16 ue_to_poll_time;            /* Time after UE, to Restart Polling
+                                        * of SLIPORT_SEMAPHORE reg
+                                        */
+       u16 last_err_code;
+       bool recovery_supported;
+       unsigned long probe_time;
+       unsigned long last_recovery_time;
+
+       /* Common to both Lancer & BEx/SH error recovery */
+       u32 resched_delay;
+       struct delayed_work err_detection_work;
+};
+
+/* Ethtool priv_flags */
+#define        BE_DISABLE_TPE_RECOVERY 0x1
+
 struct be_adapter {
        struct pci_dev *pdev;
        struct net_device *netdev;
@@ -560,7 +620,6 @@ struct be_adapter {
        struct delayed_work work;
        u16 work_counter;
 
-       struct delayed_work be_err_detection_work;
        u8 recovery_retries;
        u8 err_flags;
        bool pcicfg_mapped;     /* pcicfg obtained via pci_iomap() */
@@ -634,6 +693,9 @@ struct be_adapter {
        u32 fat_dump_len;
        u16 serial_num[CNTL_SERIAL_NUM_WORDS];
        u8 phy_state; /* state of sfp optics (functional, faulted, etc.,) */
+       u8 dev_mac[ETH_ALEN];
+       u32 priv_flags; /* ethtool get/set_priv_flags() */
+       struct be_error_recovery error_recovery;
 };
 
 /* Used for defered FW config cmds. Add fields to this struct as reqd */
@@ -867,6 +929,9 @@ static inline bool is_ipv4_pkt(struct sk_buff *skb)
        return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4;
 }
 
+#define be_error_recovering(adapter)   \
+               (adapter->flags & BE_FLAGS_TRY_RECOVERY)
+
 #define BE_ERROR_EEH           1
 #define BE_ERROR_UE            BIT(1)
 #define BE_ERROR_FW            BIT(2)
index fa11a5a..15d02da 100644 (file)
@@ -92,6 +92,11 @@ static struct be_cmd_priv_map cmd_priv_map[] = {
                CMD_SUBSYSTEM_COMMON,
                BE_PRIV_DEVCFG | BE_PRIV_VHADM
        },
+       {
+               OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES,
+               CMD_SUBSYSTEM_COMMON,
+               BE_PRIV_DEVCFG
+       }
 };
 
 static bool be_cmd_allowed(struct be_adapter *adapter, u8 opcode, u8 subsystem)
@@ -705,7 +710,7 @@ static int be_mbox_notify_wait(struct be_adapter *adapter)
        return 0;
 }
 
-static u16 be_POST_stage_get(struct be_adapter *adapter)
+u16 be_POST_stage_get(struct be_adapter *adapter)
 {
        u32 sem;
 
@@ -4127,6 +4132,10 @@ int be_cmd_get_ext_fat_capabilites(struct be_adapter *adapter,
        struct be_cmd_req_get_ext_fat_caps *req;
        int status;
 
+       if (!be_cmd_allowed(adapter, OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES,
+                           CMD_SUBSYSTEM_COMMON))
+               return -EPERM;
+
        if (mutex_lock_interruptible(&adapter->mbox_lock))
                return -1;
 
@@ -4138,7 +4147,7 @@ int be_cmd_get_ext_fat_capabilites(struct be_adapter *adapter,
 
        req = cmd->va;
        be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-                              OPCODE_COMMON_GET_EXT_FAT_CAPABILITES,
+                              OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES,
                               cmd->size, wrb, cmd);
        req->parameter_type = cpu_to_le32(1);
 
@@ -4167,7 +4176,7 @@ int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter,
        req = cmd->va;
        memcpy(&req->set_params, configs, sizeof(struct be_fat_conf_params));
        be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-                              OPCODE_COMMON_SET_EXT_FAT_CAPABILITES,
+                              OPCODE_COMMON_SET_EXT_FAT_CAPABILITIES,
                               cmd->size, wrb, cmd);
 
        status = be_mcc_notify_wait(adapter);
@@ -4954,6 +4963,57 @@ int be_cmd_set_logical_link_config(struct be_adapter *adapter,
                                                          1, domain);
        return status;
 }
+
+int be_cmd_set_features(struct be_adapter *adapter)
+{
+       struct be_cmd_resp_set_features *resp;
+       struct be_cmd_req_set_features *req;
+       struct be_mcc_wrb *wrb;
+       int status;
+
+       if (mutex_lock_interruptible(&adapter->mcc_lock))
+               return -1;
+
+       wrb = wrb_from_mccq(adapter);
+       if (!wrb) {
+               status = -EBUSY;
+               goto err;
+       }
+
+       req = embedded_payload(wrb);
+
+       be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
+                              OPCODE_COMMON_SET_FEATURES,
+                              sizeof(*req), wrb, NULL);
+
+       req->features = cpu_to_le32(BE_FEATURE_UE_RECOVERY);
+       req->parameter_len = cpu_to_le32(sizeof(struct be_req_ue_recovery));
+       req->parameter.req.uer = cpu_to_le32(BE_UE_RECOVERY_UER_MASK);
+
+       status = be_mcc_notify_wait(adapter);
+       if (status)
+               goto err;
+
+       resp = embedded_payload(wrb);
+
+       adapter->error_recovery.ue_to_poll_time =
+               le16_to_cpu(resp->parameter.resp.ue2rp);
+       adapter->error_recovery.ue_to_reset_time =
+               le16_to_cpu(resp->parameter.resp.ue2sr);
+       adapter->error_recovery.recovery_supported = true;
+err:
+       /* Checking "MCC_STATUS_INVALID_LENGTH" for SKH as FW
+        * returns this error in older firmware versions
+        */
+       if (base_status(status) == MCC_STATUS_ILLEGAL_REQUEST ||
+           base_status(status) == MCC_STATUS_INVALID_LENGTH)
+               dev_info(&adapter->pdev->dev,
+                        "Adapter does not support HW error recovery\n");
+
+       mutex_unlock(&adapter->mcc_lock);
+       return status;
+}
+
 int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload,
                    int wrb_payload_size, u16 *cmd_status, u16 *ext_status)
 {
index 0d6be22..1bd82bc 100644 (file)
@@ -58,7 +58,8 @@ enum mcc_base_status {
        MCC_STATUS_INSUFFICIENT_BUFFER = 4,
        MCC_STATUS_UNAUTHORIZED_REQUEST = 5,
        MCC_STATUS_NOT_SUPPORTED = 66,
-       MCC_STATUS_FEATURE_NOT_SUPPORTED = 68
+       MCC_STATUS_FEATURE_NOT_SUPPORTED = 68,
+       MCC_STATUS_INVALID_LENGTH = 116
 };
 
 /* Additional status */
@@ -294,8 +295,8 @@ struct be_mcc_mailbox {
 #define OPCODE_COMMON_GET_PHY_DETAILS                  102
 #define OPCODE_COMMON_SET_DRIVER_FUNCTION_CAP          103
 #define OPCODE_COMMON_GET_CNTL_ADDITIONAL_ATTRIBUTES   121
-#define OPCODE_COMMON_GET_EXT_FAT_CAPABILITES          125
-#define OPCODE_COMMON_SET_EXT_FAT_CAPABILITES          126
+#define OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES         125
+#define OPCODE_COMMON_SET_EXT_FAT_CAPABILITIES         126
 #define OPCODE_COMMON_GET_MAC_LIST                     147
 #define OPCODE_COMMON_SET_MAC_LIST                     148
 #define OPCODE_COMMON_GET_HSW_CONFIG                   152
@@ -308,6 +309,7 @@ struct be_mcc_mailbox {
 #define OPCODE_COMMON_READ_OBJECT                      171
 #define OPCODE_COMMON_WRITE_OBJECT                     172
 #define OPCODE_COMMON_DELETE_OBJECT                    174
+#define OPCODE_COMMON_SET_FEATURES                     191
 #define OPCODE_COMMON_MANAGE_IFACE_FILTERS             193
 #define OPCODE_COMMON_GET_IFACE_LIST                   194
 #define OPCODE_COMMON_ENABLE_DISABLE_VF                        196
@@ -2315,6 +2317,41 @@ struct be_cmd_resp_get_iface_list {
        struct be_if_desc if_desc;
 };
 
+/************** Set Features *******************/
+#define        BE_FEATURE_UE_RECOVERY          0x10
+#define        BE_UE_RECOVERY_UER_MASK         0x1
+
+struct be_req_ue_recovery {
+       u32     uer;
+       u32     rsvd;
+};
+
+struct be_cmd_req_set_features {
+       struct be_cmd_req_hdr hdr;
+       u32 features;
+       u32 parameter_len;
+       union {
+               struct be_req_ue_recovery req;
+               u32 rsvd[2];
+       } parameter;
+};
+
+struct be_resp_ue_recovery {
+       u32 uer;
+       u16 ue2rp;
+       u16 ue2sr;
+};
+
+struct be_cmd_resp_set_features {
+       struct be_cmd_resp_hdr hdr;
+       u32 features;
+       u32 parameter_len;
+       union {
+               struct be_resp_ue_recovery resp;
+               u32 rsvd[2];
+       } parameter;
+};
+
 /*************** Set logical link ********************/
 #define PLINK_ENABLE            BIT(0)
 #define PLINK_TRACK             BIT(8)
@@ -2343,6 +2380,7 @@ struct be_cmd_req_manage_iface_filters {
        u32 cap_control_flags;
 } __packed;
 
+u16 be_POST_stage_get(struct be_adapter *adapter);
 int be_pci_fnum_get(struct be_adapter *adapter);
 int be_fw_wait_ready(struct be_adapter *adapter);
 int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
@@ -2470,3 +2508,4 @@ int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op);
 int be_cmd_set_sriov_config(struct be_adapter *adapter,
                            struct be_resources res, u16 num_vfs,
                            struct be_resources *vft_res);
+int be_cmd_set_features(struct be_adapter *adapter);
index 50e7be5..0a48a31 100644 (file)
@@ -421,6 +421,10 @@ static void be_get_ethtool_stats(struct net_device *netdev,
        }
 }
 
+static const char be_priv_flags[][ETH_GSTRING_LEN] = {
+       "disable-tpe-recovery"
+};
+
 static void be_get_stat_strings(struct net_device *netdev, uint32_t stringset,
                                uint8_t *data)
 {
@@ -454,6 +458,10 @@ static void be_get_stat_strings(struct net_device *netdev, uint32_t stringset,
                        data += ETH_GSTRING_LEN;
                }
                break;
+       case ETH_SS_PRIV_FLAGS:
+               for (i = 0; i < ARRAY_SIZE(be_priv_flags); i++)
+                       strcpy(data + i * ETH_GSTRING_LEN, be_priv_flags[i]);
+               break;
        }
 }
 
@@ -468,6 +476,8 @@ static int be_get_sset_count(struct net_device *netdev, int stringset)
                return ETHTOOL_STATS_NUM +
                        adapter->num_rx_qs * ETHTOOL_RXSTATS_NUM +
                        adapter->num_tx_qs * ETHTOOL_TXSTATS_NUM;
+       case ETH_SS_PRIV_FLAGS:
+               return ARRAY_SIZE(be_priv_flags);
        default:
                return -EINVAL;
        }
@@ -1360,6 +1370,34 @@ err:
        return be_cmd_status(status);
 }
 
+static u32 be_get_priv_flags(struct net_device *netdev)
+{
+       struct be_adapter *adapter = netdev_priv(netdev);
+
+       return adapter->priv_flags;
+}
+
+static int be_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+       struct be_adapter *adapter = netdev_priv(netdev);
+       bool tpe_old = !!(adapter->priv_flags & BE_DISABLE_TPE_RECOVERY);
+       bool tpe_new = !!(flags & BE_DISABLE_TPE_RECOVERY);
+
+       if (tpe_old != tpe_new) {
+               if (tpe_new) {
+                       adapter->priv_flags |= BE_DISABLE_TPE_RECOVERY;
+                       dev_info(&adapter->pdev->dev,
+                                "HW error recovery is disabled\n");
+               } else {
+                       adapter->priv_flags &= ~BE_DISABLE_TPE_RECOVERY;
+                       dev_info(&adapter->pdev->dev,
+                                "HW error recovery is enabled\n");
+               }
+       }
+
+       return 0;
+}
+
 const struct ethtool_ops be_ethtool_ops = {
        .get_settings = be_get_settings,
        .get_drvinfo = be_get_drvinfo,
@@ -1373,6 +1411,8 @@ const struct ethtool_ops be_ethtool_ops = {
        .get_ringparam = be_get_ringparam,
        .get_pauseparam = be_get_pauseparam,
        .set_pauseparam = be_set_pauseparam,
+       .set_priv_flags = be_set_priv_flags,
+       .get_priv_flags = be_get_priv_flags,
        .get_strings = be_get_stat_strings,
        .set_phys_id = be_set_phys_id,
        .set_dump = be_set_dump,
index c684bb3..92942c8 100644 (file)
 #define MPU_EP_CONTROL                 0
 
 /********** MPU semphore: used for SH & BE  *************/
+#define SLIPORT_SOFTRESET_OFFSET               0x5c    /* CSR BAR offset */
 #define SLIPORT_SEMAPHORE_OFFSET_BEx           0xac  /* CSR BAR offset */
 #define SLIPORT_SEMAPHORE_OFFSET_SH            0x94  /* PCI-CFG offset */
 #define POST_STAGE_MASK                                0x0000FFFF
 #define POST_ERR_MASK                          0x1
 #define POST_ERR_SHIFT                         31
+#define POST_ERR_RECOVERY_CODE_MASK            0xFFF
+
+/* Soft Reset register masks */
+#define SLIPORT_SOFTRESET_SR_MASK              0x00000080      /* SR bit */
 
 /* MPU semphore POST stage values */
 #define POST_STAGE_AWAITING_HOST_RDY   0x1 /* FW awaiting goahead from host */
 #define POST_STAGE_HOST_RDY            0x2 /* Host has given go-ahed to FW */
 #define POST_STAGE_BE_RESET            0x3 /* Host wants to reset chip */
 #define POST_STAGE_ARMFW_RDY           0xc000  /* FW is done with POST */
-
+#define POST_STAGE_RECOVERABLE_ERR     0xE000  /* Recoverable err detected */
 
 /* Lancer SLIPORT registers */
 #define SLIPORT_STATUS_OFFSET          0x404
index f7584d4..34f63ef 100644 (file)
@@ -41,6 +41,11 @@ static ushort rx_frag_size = 2048;
 module_param(rx_frag_size, ushort, S_IRUGO);
 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
 
+/* Per-module error detection/recovery workq shared across all functions.
+ * Each function schedules its own work request on this shared workq.
+ */
+struct workqueue_struct *be_err_recovery_workq;
+
 static const struct pci_device_id be_dev_ids[] = {
        { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
        { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
@@ -264,6 +269,38 @@ void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
        iowrite32(val, adapter->db + DB_CQ_OFFSET);
 }
 
+static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
+{
+       int i;
+
+       /* Check if mac has already been added as part of uc-list */
+       for (i = 0; i < adapter->uc_macs; i++) {
+               if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN],
+                                    mac)) {
+                       /* mac already added, skip addition */
+                       adapter->pmac_id[0] = adapter->pmac_id[i + 1];
+                       return 0;
+               }
+       }
+
+       return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
+                              &adapter->pmac_id[0], 0);
+}
+
+static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
+{
+       int i;
+
+       /* Skip deletion if the programmed mac is
+        * being used in uc-list
+        */
+       for (i = 0; i < adapter->uc_macs; i++) {
+               if (adapter->pmac_id[i + 1] == pmac_id)
+                       return;
+       }
+       be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
+}
+
 static int be_mac_addr_set(struct net_device *netdev, void *p)
 {
        struct be_adapter *adapter = netdev_priv(netdev);
@@ -271,7 +308,7 @@ static int be_mac_addr_set(struct net_device *netdev, void *p)
        struct sockaddr *addr = p;
        int status;
        u8 mac[ETH_ALEN];
-       u32 old_pmac_id = adapter->pmac_id[0], curr_pmac_id = 0;
+       u32 old_pmac_id = adapter->pmac_id[0];
 
        if (!is_valid_ether_addr(addr->sa_data))
                return -EADDRNOTAVAIL;
@@ -279,7 +316,7 @@ static int be_mac_addr_set(struct net_device *netdev, void *p)
        /* Proceed further only if, User provided MAC is different
         * from active MAC
         */
-       if (ether_addr_equal(addr->sa_data, netdev->dev_addr))
+       if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
                return 0;
 
        /* if device is not running, copy MAC to netdev->dev_addr */
@@ -292,23 +329,22 @@ static int be_mac_addr_set(struct net_device *netdev, void *p)
         * FILTMGMT privilege. This failure is OK, only if the PF programmed
         * the MAC for the VF.
         */
-       status = be_cmd_pmac_add(adapter, (u8 *)addr->sa_data,
-                                adapter->if_handle, &adapter->pmac_id[0], 0);
+       mutex_lock(&adapter->rx_filter_lock);
+       status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
        if (!status) {
-               curr_pmac_id = adapter->pmac_id[0];
 
                /* Delete the old programmed MAC. This call may fail if the
                 * old MAC was already deleted by the PF driver.
                 */
                if (adapter->pmac_id[0] != old_pmac_id)
-                       be_cmd_pmac_del(adapter, adapter->if_handle,
-                                       old_pmac_id, 0);
+                       be_dev_mac_del(adapter, old_pmac_id);
        }
 
+       mutex_unlock(&adapter->rx_filter_lock);
        /* Decide if the new MAC is successfully activated only after
         * querying the FW
         */
-       status = be_cmd_get_active_mac(adapter, curr_pmac_id, mac,
+       status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
                                       adapter->if_handle, true, 0);
        if (status)
                goto err;
@@ -321,6 +357,7 @@ static int be_mac_addr_set(struct net_device *netdev, void *p)
                goto err;
        }
 done:
+       ether_addr_copy(adapter->dev_mac, addr->sa_data);
        ether_addr_copy(netdev->dev_addr, addr->sa_data);
        dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
        return 0;
@@ -1623,6 +1660,28 @@ static void be_clear_mc_list(struct be_adapter *adapter)
        adapter->mc_count = 0;
 }
 
+static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
+{
+       if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
+                            adapter->dev_mac)) {
+               adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
+               return 0;
+       }
+
+       return be_cmd_pmac_add(adapter,
+                              (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
+                              adapter->if_handle,
+                              &adapter->pmac_id[uc_idx + 1], 0);
+}
+
+static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
+{
+       if (pmac_id == adapter->pmac_id[0])
+               return;
+
+       be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
+}
+
 static void be_set_uc_list(struct be_adapter *adapter)
 {
        struct net_device *netdev = adapter->netdev;
@@ -1663,13 +1722,10 @@ static void be_set_uc_list(struct be_adapter *adapter)
                be_clear_uc_promisc(adapter);
 
                for (i = 0; i < adapter->uc_macs; i++)
-                       be_cmd_pmac_del(adapter, adapter->if_handle,
-                                       adapter->pmac_id[i + 1], 0);
+                       be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
 
                for (i = 0; i < curr_uc_macs; i++)
-                       be_cmd_pmac_add(adapter, adapter->uc_list[i].mac,
-                                       adapter->if_handle,
-                                       &adapter->pmac_id[i + 1], 0);
+                       be_uc_mac_add(adapter, i);
                adapter->uc_macs = curr_uc_macs;
                adapter->update_uc_list = false;
        }
@@ -1682,8 +1738,8 @@ static void be_clear_uc_list(struct be_adapter *adapter)
 
        __dev_uc_unsync(netdev, NULL);
        for (i = 0; i < adapter->uc_macs; i++)
-               be_cmd_pmac_del(adapter, adapter->if_handle,
-                               adapter->pmac_id[i + 1], 0);
+               be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
+
        adapter->uc_macs = 0;
 }
 
@@ -3358,9 +3414,7 @@ void be_detect_error(struct be_adapter *adapter)
                 */
 
                if (ue_lo || ue_hi) {
-                       dev_err(dev,
-                               "Unrecoverable Error detected in the adapter");
-                       dev_err(dev, "Please reboot server to recover");
+                       dev_err(dev, "Error detected in the adapter");
                        if (skyhawk_chip(adapter))
                                be_set_error(adapter, BE_ERROR_UE);
 
@@ -3563,9 +3617,7 @@ static void be_rx_qs_destroy(struct be_adapter *adapter)
 
 static void be_disable_if_filters(struct be_adapter *adapter)
 {
-       be_cmd_pmac_del(adapter, adapter->if_handle,
-                       adapter->pmac_id[0], 0);
-
+       be_dev_mac_del(adapter, adapter->pmac_id[0]);
        be_clear_uc_list(adapter);
        be_clear_mc_list(adapter);
 
@@ -3720,11 +3772,10 @@ static int be_enable_if_filters(struct be_adapter *adapter)
 
        /* For BE3 VFs, the PF programs the initial MAC address */
        if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
-               status = be_cmd_pmac_add(adapter, adapter->netdev->dev_addr,
-                                        adapter->if_handle,
-                                        &adapter->pmac_id[0], 0);
+               status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
                if (status)
                        return status;
+               ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
        }
 
        if (adapter->vlans_added)
@@ -3903,8 +3954,13 @@ static void be_cancel_worker(struct be_adapter *adapter)
 
 static void be_cancel_err_detection(struct be_adapter *adapter)
 {
+       struct be_error_recovery *err_rec = &adapter->error_recovery;
+
+       if (!be_err_recovery_workq)
+               return;
+
        if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
-               cancel_delayed_work_sync(&adapter->be_err_detection_work);
+               cancel_delayed_work_sync(&err_rec->err_detection_work);
                adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
        }
 }
@@ -4503,10 +4559,25 @@ static void be_schedule_worker(struct be_adapter *adapter)
        adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
 }
 
+static void be_destroy_err_recovery_workq(void)
+{
+       if (!be_err_recovery_workq)
+               return;
+
+       flush_workqueue(be_err_recovery_workq);
+       destroy_workqueue(be_err_recovery_workq);
+       be_err_recovery_workq = NULL;
+}
+
 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
 {
-       schedule_delayed_work(&adapter->be_err_detection_work,
-                             msecs_to_jiffies(delay));
+       struct be_error_recovery *err_rec = &adapter->error_recovery;
+
+       if (!be_err_recovery_workq)
+               return;
+
+       queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
+                          msecs_to_jiffies(delay));
        adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
 }
 
@@ -4635,10 +4706,15 @@ static inline int fw_major_num(const char *fw_ver)
        return fw_major;
 }
 
-/* If any VFs are already enabled don't FLR the PF */
+/* If it is error recovery, FLR the PF
+ * Else if any VFs are already enabled don't FLR the PF
+ */
 static bool be_reset_required(struct be_adapter *adapter)
 {
-       return pci_num_vf(adapter->pdev) ? false : true;
+       if (be_error_recovering(adapter))
+               return true;
+       else
+               return pci_num_vf(adapter->pdev) == 0;
 }
 
 /* Wait for the FW to be ready and perform the required initialization */
@@ -4650,6 +4726,9 @@ static int be_func_init(struct be_adapter *adapter)
        if (status)
                return status;
 
+       /* FW is now ready; clear errors to allow cmds/doorbell */
+       be_clear_error(adapter, BE_CLEAR_ALL);
+
        if (be_reset_required(adapter)) {
                status = be_cmd_reset_function(adapter);
                if (status)
@@ -4657,9 +4736,6 @@ static int be_func_init(struct be_adapter *adapter)
 
                /* Wait for interrupts to quiesce after an FLR */
                msleep(100);
-
-               /* We can clear all errors when function reset succeeds */
-               be_clear_error(adapter, BE_CLEAR_ALL);
        }
 
        /* Tell FW we're ready to fire cmds */
@@ -4767,6 +4843,9 @@ static int be_setup(struct be_adapter *adapter)
        if (!status && be_pause_supported(adapter))
                adapter->phy.fc_autoneg = 1;
 
+       if (be_physfn(adapter) && !lancer_chip(adapter))
+               be_cmd_set_features(adapter);
+
        be_schedule_worker(adapter);
        adapter->flags |= BE_FLAGS_SETUP_DONE;
        return 0;
@@ -5210,13 +5289,145 @@ static int be_resume(struct be_adapter *adapter)
        return 0;
 }
 
+static void be_soft_reset(struct be_adapter *adapter)
+{
+       u32 val;
+
+       dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
+       val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
+       val |= SLIPORT_SOFTRESET_SR_MASK;
+       iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
+}
+
+static bool be_err_is_recoverable(struct be_adapter *adapter)
+{
+       struct be_error_recovery *err_rec = &adapter->error_recovery;
+       unsigned long initial_idle_time =
+               msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
+       unsigned long recovery_interval =
+               msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
+       u16 ue_err_code;
+       u32 val;
+
+       val = be_POST_stage_get(adapter);
+       if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
+               return false;
+       ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
+       if (ue_err_code == 0)
+               return false;
+
+       dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
+               ue_err_code);
+
+       if (jiffies - err_rec->probe_time <= initial_idle_time) {
+               dev_err(&adapter->pdev->dev,
+                       "Cannot recover within %lu sec from driver load\n",
+                       jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
+               return false;
+       }
+
+       if (err_rec->last_recovery_time &&
+           (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
+               dev_err(&adapter->pdev->dev,
+                       "Cannot recover within %lu sec from last recovery\n",
+                       jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
+               return false;
+       }
+
+       if (ue_err_code == err_rec->last_err_code) {
+               dev_err(&adapter->pdev->dev,
+                       "Cannot recover from a consecutive TPE error\n");
+               return false;
+       }
+
+       err_rec->last_recovery_time = jiffies;
+       err_rec->last_err_code = ue_err_code;
+       return true;
+}
+
+static int be_tpe_recover(struct be_adapter *adapter)
+{
+       struct be_error_recovery *err_rec = &adapter->error_recovery;
+       int status = -EAGAIN;
+       u32 val;
+
+       switch (err_rec->recovery_state) {
+       case ERR_RECOVERY_ST_NONE:
+               err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
+               err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
+               break;
+
+       case ERR_RECOVERY_ST_DETECT:
+               val = be_POST_stage_get(adapter);
+               if ((val & POST_STAGE_RECOVERABLE_ERR) !=
+                   POST_STAGE_RECOVERABLE_ERR) {
+                       dev_err(&adapter->pdev->dev,
+                               "Unrecoverable HW error detected: 0x%x\n", val);
+                       status = -EINVAL;
+                       err_rec->resched_delay = 0;
+                       break;
+               }
+
+               dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
+
+               /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
+                * milliseconds before it checks for final error status in
+                * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
+                * If it does, then PF0 initiates a Soft Reset.
+                */
+               if (adapter->pf_num == 0) {
+                       err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
+                       err_rec->resched_delay = err_rec->ue_to_reset_time -
+                                       ERR_RECOVERY_UE_DETECT_DURATION;
+                       break;
+               }
+
+               err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
+               err_rec->resched_delay = err_rec->ue_to_poll_time -
+                                       ERR_RECOVERY_UE_DETECT_DURATION;
+               break;
+
+       case ERR_RECOVERY_ST_RESET:
+               if (!be_err_is_recoverable(adapter)) {
+                       dev_err(&adapter->pdev->dev,
+                               "Failed to meet recovery criteria\n");
+                       status = -EIO;
+                       err_rec->resched_delay = 0;
+                       break;
+               }
+               be_soft_reset(adapter);
+               err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
+               err_rec->resched_delay = err_rec->ue_to_poll_time -
+                                       err_rec->ue_to_reset_time;
+               break;
+
+       case ERR_RECOVERY_ST_PRE_POLL:
+               err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
+               err_rec->resched_delay = 0;
+               status = 0;                     /* done */
+               break;
+
+       default:
+               status = -EINVAL;
+               err_rec->resched_delay = 0;
+               break;
+       }
+
+       return status;
+}
+
 static int be_err_recover(struct be_adapter *adapter)
 {
        int status;
 
-       /* Error recovery is supported only Lancer as of now */
-       if (!lancer_chip(adapter))
-               return -EIO;
+       if (!lancer_chip(adapter)) {
+               if (!adapter->error_recovery.recovery_supported ||
+                   adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
+                       return -EIO;
+               status = be_tpe_recover(adapter);
+               if (status)
+                       goto err;
+       }
 
        /* Wait for adapter to reach quiescent state before
         * destroying queues
@@ -5225,59 +5436,74 @@ static int be_err_recover(struct be_adapter *adapter)
        if (status)
                goto err;
 
+       adapter->flags |= BE_FLAGS_TRY_RECOVERY;
+
        be_cleanup(adapter);
 
        status = be_resume(adapter);
        if (status)
                goto err;
 
-       return 0;
+       adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
+
 err:
        return status;
 }
 
 static void be_err_detection_task(struct work_struct *work)
 {
+       struct be_error_recovery *err_rec =
+                       container_of(work, struct be_error_recovery,
+                                    err_detection_work.work);
        struct be_adapter *adapter =
-                               container_of(work, struct be_adapter,
-                                            be_err_detection_work.work);
+                       container_of(err_rec, struct be_adapter,
+                                    error_recovery);
+       u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
        struct device *dev = &adapter->pdev->dev;
        int recovery_status;
-       int delay = ERR_DETECTION_DELAY;
 
        be_detect_error(adapter);
-
-       if (be_check_error(adapter, BE_ERROR_HW))
-               recovery_status = be_err_recover(adapter);
-       else
+       if (!be_check_error(adapter, BE_ERROR_HW))
                goto reschedule_task;
 
+       recovery_status = be_err_recover(adapter);
        if (!recovery_status) {
-               adapter->recovery_retries = 0;
+               err_rec->recovery_retries = 0;
+               err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
                dev_info(dev, "Adapter recovery successful\n");
                goto reschedule_task;
-       } else if (be_virtfn(adapter)) {
+       } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
+               /* BEx/SH recovery state machine */
+               if (adapter->pf_num == 0 &&
+                   err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
+                       dev_err(&adapter->pdev->dev,
+                               "Adapter recovery in progress\n");
+               resched_delay = err_rec->resched_delay;
+               goto reschedule_task;
+       } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
                /* For VFs, check if PF have allocated resources
                 * every second.
                 */
                dev_err(dev, "Re-trying adapter recovery\n");
                goto reschedule_task;
-       } else if (adapter->recovery_retries++ <
-                  MAX_ERR_RECOVERY_RETRY_COUNT) {
+       } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
+                  ERR_RECOVERY_MAX_RETRY_COUNT) {
                /* In case of another error during recovery, it takes 30 sec
                 * for adapter to come out of error. Retry error recovery after
                 * this time interval.
                 */
                dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
-               delay = ERR_RECOVERY_RETRY_DELAY;
+               resched_delay = ERR_RECOVERY_RETRY_DELAY;
                goto reschedule_task;
        } else {
                dev_err(dev, "Adapter recovery failed\n");
+               dev_err(dev, "Please reboot server to recover\n");
        }
 
        return;
+
 reschedule_task:
-       be_schedule_err_detection(adapter, delay);
+       be_schedule_err_detection(adapter, resched_delay);
 }
 
 static void be_log_sfp_info(struct be_adapter *adapter)
@@ -5490,7 +5716,10 @@ static int be_drv_init(struct be_adapter *adapter)
        pci_save_state(adapter->pdev);
 
        INIT_DELAYED_WORK(&adapter->work, be_worker);
-       INIT_DELAYED_WORK(&adapter->be_err_detection_work,
+
+       adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
+       adapter->error_recovery.resched_delay = 0;
+       INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
                          be_err_detection_task);
 
        adapter->rx_fc = true;
@@ -5525,6 +5754,9 @@ static void be_remove(struct pci_dev *pdev)
 
        be_clear(adapter);
 
+       if (!pci_vfs_assigned(adapter->pdev))
+               be_cmd_reset_function(adapter);
+
        /* tell fw we're done with firing cmds */
        be_cmd_fw_clean(adapter);
 
@@ -5681,6 +5913,7 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
        be_roce_dev_add(adapter);
 
        be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
+       adapter->error_recovery.probe_time = jiffies;
 
        /* On Die temperature not supported for VF. */
        if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
@@ -5926,6 +6159,8 @@ static struct pci_driver be_driver = {
 
 static int __init be_init_module(void)
 {
+       int status;
+
        if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
            rx_frag_size != 2048) {
                printk(KERN_WARNING DRV_NAME
@@ -5945,7 +6180,17 @@ static int __init be_init_module(void)
                return -1;
        }
 
-       return pci_register_driver(&be_driver);
+       be_err_recovery_workq =
+               create_singlethread_workqueue("be_err_recover");
+       if (!be_err_recovery_workq)
+               pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
+
+       status = pci_register_driver(&be_driver);
+       if (status) {
+               destroy_workqueue(be_wq);
+               be_destroy_err_recovery_workq();
+       }
+       return status;
 }
 module_init(be_init_module);
 
@@ -5953,6 +6198,8 @@ static void __exit be_exit_module(void)
 {
        pci_unregister_driver(&be_driver);
 
+       be_destroy_err_recovery_workq();
+
        if (be_wq)
                destroy_workqueue(be_wq);
 }
index 8ddeedb..ddf0260 100644 (file)
@@ -192,7 +192,7 @@ struct fman_mac_params {
        /* A handle to the FM object this port related to */
        void *fm;
        /* MDIO exceptions interrupt source - not valid for all
-        * MACs; MUST be set to 'NO_IRQ' for MACs that don't have
+        * MACs; MUST be set to 0 for MACs that don't have
         * mdio-irq, or for polling
         */
        void *dev_id; /* device cookie used by the exception cbs */
index 61fd486..dc120c1 100644 (file)
@@ -60,6 +60,9 @@ module_param(fs_enet_debug, int, 0);
 MODULE_PARM_DESC(fs_enet_debug,
                 "Freescale bitmapped debugging message enable value");
 
+#define RX_RING_SIZE   32
+#define TX_RING_SIZE   64
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void fs_enet_netpoll(struct net_device *dev);
 #endif
@@ -79,8 +82,8 @@ static void skb_align(struct sk_buff *skb, int align)
                skb_reserve(skb, align - off);
 }
 
-/* NAPI receive function */
-static int fs_enet_rx_napi(struct napi_struct *napi, int budget)
+/* NAPI function */
+static int fs_enet_napi(struct napi_struct *napi, int budget)
 {
        struct fs_enet_private *fep = container_of(napi, struct fs_enet_private, napi);
        struct net_device *dev = fep->ndev;
@@ -90,9 +93,102 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget)
        int received = 0;
        u16 pkt_len, sc;
        int curidx;
+       int dirtyidx, do_wake, do_restart;
+       int tx_left = TX_RING_SIZE;
 
-       if (budget <= 0)
-               return received;
+       spin_lock(&fep->tx_lock);
+       bdp = fep->dirty_tx;
+
+       /* clear status bits for napi*/
+       (*fep->ops->napi_clear_event)(dev);
+
+       do_wake = do_restart = 0;
+       while (((sc = CBDR_SC(bdp)) & BD_ENET_TX_READY) == 0 && tx_left) {
+               dirtyidx = bdp - fep->tx_bd_base;
+
+               if (fep->tx_free == fep->tx_ring)
+                       break;
+
+               skb = fep->tx_skbuff[dirtyidx];
+
+               /*
+                * Check for errors.
+                */
+               if (sc & (BD_ENET_TX_HB | BD_ENET_TX_LC |
+                         BD_ENET_TX_RL | BD_ENET_TX_UN | BD_ENET_TX_CSL)) {
+
+                       if (sc & BD_ENET_TX_HB) /* No heartbeat */
+                               fep->stats.tx_heartbeat_errors++;
+                       if (sc & BD_ENET_TX_LC) /* Late collision */
+                               fep->stats.tx_window_errors++;
+                       if (sc & BD_ENET_TX_RL) /* Retrans limit */
+                               fep->stats.tx_aborted_errors++;
+                       if (sc & BD_ENET_TX_UN) /* Underrun */
+                               fep->stats.tx_fifo_errors++;
+                       if (sc & BD_ENET_TX_CSL)        /* Carrier lost */
+                               fep->stats.tx_carrier_errors++;
+
+                       if (sc & (BD_ENET_TX_LC | BD_ENET_TX_RL | BD_ENET_TX_UN)) {
+                               fep->stats.tx_errors++;
+                               do_restart = 1;
+                       }
+               } else
+                       fep->stats.tx_packets++;
+
+               if (sc & BD_ENET_TX_READY) {
+                       dev_warn(fep->dev,
+                                "HEY! Enet xmit interrupt and TX_READY.\n");
+               }
+
+               /*
+                * Deferred means some collisions occurred during transmit,
+                * but we eventually sent the packet OK.
+                */
+               if (sc & BD_ENET_TX_DEF)
+                       fep->stats.collisions++;
+
+               /* unmap */
+               if (fep->mapped_as_page[dirtyidx])
+                       dma_unmap_page(fep->dev, CBDR_BUFADDR(bdp),
+                                      CBDR_DATLEN(bdp), DMA_TO_DEVICE);
+               else
+                       dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp),
+                                        CBDR_DATLEN(bdp), DMA_TO_DEVICE);
+
+               /*
+                * Free the sk buffer associated with this last transmit.
+                */
+               if (skb) {
+                       dev_kfree_skb(skb);
+                       fep->tx_skbuff[dirtyidx] = NULL;
+               }
+
+               /*
+                * Update pointer to next buffer descriptor to be transmitted.
+                */
+               if ((sc & BD_ENET_TX_WRAP) == 0)
+                       bdp++;
+               else
+                       bdp = fep->tx_bd_base;
+
+               /*
+                * Since we have freed up a buffer, the ring is no longer
+                * full.
+                */
+               if (++fep->tx_free == MAX_SKB_FRAGS)
+                       do_wake = 1;
+               tx_left--;
+       }
+
+       fep->dirty_tx = bdp;
+
+       if (do_restart)
+               (*fep->ops->tx_restart)(dev);
+
+       spin_unlock(&fep->tx_lock);
+
+       if (do_wake)
+               netif_wake_queue(dev);
 
        /*
         * First, grab all of the stats for the incoming packet.
@@ -100,10 +196,8 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget)
         */
        bdp = fep->cur_rx;
 
-       /* clear RX status bits for napi*/
-       (*fep->ops->napi_clear_rx_event)(dev);
-
-       while (((sc = CBDR_SC(bdp)) & BD_ENET_RX_EMPTY) == 0) {
+       while (((sc = CBDR_SC(bdp)) & BD_ENET_RX_EMPTY) == 0 &&
+              received < budget) {
                curidx = bdp - fep->rx_bd_base;
 
                /*
@@ -132,21 +226,10 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget)
                        if (sc & BD_ENET_RX_OV)
                                fep->stats.rx_crc_errors++;
 
-                       skb = fep->rx_skbuff[curidx];
-
-                       dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp),
-                               L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
-                               DMA_FROM_DEVICE);
-
-                       skbn = skb;
-
+                       skbn = fep->rx_skbuff[curidx];
                } else {
                        skb = fep->rx_skbuff[curidx];
 
-                       dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp),
-                               L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
-                               DMA_FROM_DEVICE);
-
                        /*
                         * Process the incoming frame.
                         */
@@ -162,12 +245,30 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget)
                                        skb_copy_from_linear_data(skb,
                                                      skbn->data, pkt_len);
                                        swap(skb, skbn);
+                                       dma_sync_single_for_cpu(fep->dev,
+                                               CBDR_BUFADDR(bdp),
+                                               L1_CACHE_ALIGN(pkt_len),
+                                               DMA_FROM_DEVICE);
                                }
                        } else {
                                skbn = netdev_alloc_skb(dev, ENET_RX_FRSIZE);
 
-                               if (skbn)
+                               if (skbn) {
+                                       dma_addr_t dma;
+
                                        skb_align(skbn, ENET_RX_ALIGN);
+
+                                       dma_unmap_single(fep->dev,
+                                               CBDR_BUFADDR(bdp),
+                                               L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
+                                               DMA_FROM_DEVICE);
+
+                                       dma = dma_map_single(fep->dev,
+                                               skbn->data,
+                                               L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
+                                               DMA_FROM_DEVICE);
+                                       CBDW_BUFADDR(bdp, dma);
+                               }
                        }
 
                        if (skbn != NULL) {
@@ -182,9 +283,6 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget)
                }
 
                fep->rx_skbuff[curidx] = skbn;
-               CBDW_BUFADDR(bdp, dma_map_single(fep->dev, skbn->data,
-                            L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
-                            DMA_FROM_DEVICE));
                CBDW_DATLEN(bdp, 0);
                CBDW_SC(bdp, (sc & ~BD_ENET_RX_STATS) | BD_ENET_RX_EMPTY);
 
@@ -197,134 +295,19 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget)
                        bdp = fep->rx_bd_base;
 
                (*fep->ops->rx_bd_done)(dev);
-
-               if (received >= budget)
-                       break;
        }
 
        fep->cur_rx = bdp;
 
-       if (received < budget) {
+       if (received < budget && tx_left) {
                /* done */
                napi_complete(napi);
-               (*fep->ops->napi_enable_rx)(dev);
-       }
-       return received;
-}
-
-static int fs_enet_tx_napi(struct napi_struct *napi, int budget)
-{
-       struct fs_enet_private *fep = container_of(napi, struct fs_enet_private,
-                                                  napi_tx);
-       struct net_device *dev = fep->ndev;
-       cbd_t __iomem *bdp;
-       struct sk_buff *skb;
-       int dirtyidx, do_wake, do_restart;
-       u16 sc;
-       int has_tx_work = 0;
-
-       spin_lock(&fep->tx_lock);
-       bdp = fep->dirty_tx;
-
-       /* clear TX status bits for napi*/
-       (*fep->ops->napi_clear_tx_event)(dev);
-
-       do_wake = do_restart = 0;
-       while (((sc = CBDR_SC(bdp)) & BD_ENET_TX_READY) == 0) {
-               dirtyidx = bdp - fep->tx_bd_base;
-
-               if (fep->tx_free == fep->tx_ring)
-                       break;
-
-               skb = fep->tx_skbuff[dirtyidx];
-
-               /*
-                * Check for errors.
-                */
-               if (sc & (BD_ENET_TX_HB | BD_ENET_TX_LC |
-                         BD_ENET_TX_RL | BD_ENET_TX_UN | BD_ENET_TX_CSL)) {
-
-                       if (sc & BD_ENET_TX_HB) /* No heartbeat */
-                               fep->stats.tx_heartbeat_errors++;
-                       if (sc & BD_ENET_TX_LC) /* Late collision */
-                               fep->stats.tx_window_errors++;
-                       if (sc & BD_ENET_TX_RL) /* Retrans limit */
-                               fep->stats.tx_aborted_errors++;
-                       if (sc & BD_ENET_TX_UN) /* Underrun */
-                               fep->stats.tx_fifo_errors++;
-                       if (sc & BD_ENET_TX_CSL)        /* Carrier lost */
-                               fep->stats.tx_carrier_errors++;
-
-                       if (sc & (BD_ENET_TX_LC | BD_ENET_TX_RL | BD_ENET_TX_UN)) {
-                               fep->stats.tx_errors++;
-                               do_restart = 1;
-                       }
-               } else
-                       fep->stats.tx_packets++;
-
-               if (sc & BD_ENET_TX_READY) {
-                       dev_warn(fep->dev,
-                                "HEY! Enet xmit interrupt and TX_READY.\n");
-               }
-
-               /*
-                * Deferred means some collisions occurred during transmit,
-                * but we eventually sent the packet OK.
-                */
-               if (sc & BD_ENET_TX_DEF)
-                       fep->stats.collisions++;
-
-               /* unmap */
-               if (fep->mapped_as_page[dirtyidx])
-                       dma_unmap_page(fep->dev, CBDR_BUFADDR(bdp),
-                                      CBDR_DATLEN(bdp), DMA_TO_DEVICE);
-               else
-                       dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp),
-                                        CBDR_DATLEN(bdp), DMA_TO_DEVICE);
-
-               /*
-                * Free the sk buffer associated with this last transmit.
-                */
-               if (skb) {
-                       dev_kfree_skb(skb);
-                       fep->tx_skbuff[dirtyidx] = NULL;
-               }
-
-               /*
-                * Update pointer to next buffer descriptor to be transmitted.
-                */
-               if ((sc & BD_ENET_TX_WRAP) == 0)
-                       bdp++;
-               else
-                       bdp = fep->tx_bd_base;
-
-               /*
-                * Since we have freed up a buffer, the ring is no longer
-                * full.
-                */
-               if (++fep->tx_free >= MAX_SKB_FRAGS)
-                       do_wake = 1;
-               has_tx_work = 1;
-       }
-
-       fep->dirty_tx = bdp;
-
-       if (do_restart)
-               (*fep->ops->tx_restart)(dev);
+               (*fep->ops->napi_enable)(dev);
 
-       if (!has_tx_work) {
-               napi_complete(napi);
-               (*fep->ops->napi_enable_tx)(dev);
+               return received;
        }
 
-       spin_unlock(&fep->tx_lock);
-
-       if (do_wake)
-               netif_wake_queue(dev);
-
-       if (has_tx_work)
-               return budget;
-       return 0;
+       return budget;
 }
 
 /*
@@ -350,18 +333,18 @@ fs_enet_interrupt(int irq, void *dev_id)
                nr++;
 
                int_clr_events = int_events;
-               int_clr_events &= ~fep->ev_napi_rx;
+               int_clr_events &= ~fep->ev_napi;
 
                (*fep->ops->clear_int_events)(dev, int_clr_events);
 
                if (int_events & fep->ev_err)
                        (*fep->ops->ev_error)(dev, int_events);
 
-               if (int_events & fep->ev_rx) {
+               if (int_events & fep->ev) {
                        napi_ok = napi_schedule_prep(&fep->napi);
 
-                       (*fep->ops->napi_disable_rx)(dev);
-                       (*fep->ops->clear_int_events)(dev, fep->ev_napi_rx);
+                       (*fep->ops->napi_disable)(dev);
+                       (*fep->ops->clear_int_events)(dev, fep->ev_napi);
 
                        /* NOTE: it is possible for FCCs in NAPI mode    */
                        /* to submit a spurious interrupt while in poll  */
@@ -369,17 +352,6 @@ fs_enet_interrupt(int irq, void *dev_id)
                                __napi_schedule(&fep->napi);
                }
 
-               if (int_events & fep->ev_tx) {
-                       napi_ok = napi_schedule_prep(&fep->napi_tx);
-
-                       (*fep->ops->napi_disable_tx)(dev);
-                       (*fep->ops->clear_int_events)(dev, fep->ev_napi_tx);
-
-                       /* NOTE: it is possible for FCCs in NAPI mode    */
-                       /* to submit a spurious interrupt while in poll  */
-                       if (napi_ok)
-                               __napi_schedule(&fep->napi_tx);
-               }
        }
 
        handled = nr > 0;
@@ -659,7 +631,8 @@ static void fs_timeout(struct net_device *dev)
        }
 
        phy_start(dev->phydev);
-       wake = fep->tx_free && !(CBDR_SC(fep->cur_tx) & BD_ENET_TX_READY);
+       wake = fep->tx_free >= MAX_SKB_FRAGS &&
+              !(CBDR_SC(fep->cur_tx) & BD_ENET_TX_READY);
        spin_unlock_irqrestore(&fep->lock, flags);
 
        if (wake)
@@ -751,11 +724,10 @@ static int fs_enet_open(struct net_device *dev)
        int err;
 
        /* to initialize the fep->cur_rx,... */
-       /* not doing this, will cause a crash in fs_enet_rx_napi */
+       /* not doing this, will cause a crash in fs_enet_napi */
        fs_init_bds(fep->ndev);
 
        napi_enable(&fep->napi);
-       napi_enable(&fep->napi_tx);
 
        /* Install our interrupt handler. */
        r = request_irq(fep->interrupt, fs_enet_interrupt, IRQF_SHARED,
@@ -763,7 +735,6 @@ static int fs_enet_open(struct net_device *dev)
        if (r != 0) {
                dev_err(fep->dev, "Could not allocate FS_ENET IRQ!");
                napi_disable(&fep->napi);
-               napi_disable(&fep->napi_tx);
                return -EINVAL;
        }
 
@@ -771,7 +742,6 @@ static int fs_enet_open(struct net_device *dev)
        if (err) {
                free_irq(fep->interrupt, dev);
                napi_disable(&fep->napi);
-               napi_disable(&fep->napi_tx);
                return err;
        }
        phy_start(dev->phydev);
@@ -789,7 +759,6 @@ static int fs_enet_close(struct net_device *dev)
        netif_stop_queue(dev);
        netif_carrier_off(dev);
        napi_disable(&fep->napi);
-       napi_disable(&fep->napi_tx);
        phy_stop(dev->phydev);
 
        spin_lock_irqsave(&fep->lock, flags);
@@ -861,6 +830,44 @@ static void fs_set_msglevel(struct net_device *dev, u32 value)
        fep->msg_enable = value;
 }
 
+static int fs_get_tunable(struct net_device *dev,
+                         const struct ethtool_tunable *tuna, void *data)
+{
+       struct fs_enet_private *fep = netdev_priv(dev);
+       struct fs_platform_info *fpi = fep->fpi;
+       int ret = 0;
+
+       switch (tuna->id) {
+       case ETHTOOL_RX_COPYBREAK:
+               *(u32 *)data = fpi->rx_copybreak;
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+       return ret;
+}
+
+static int fs_set_tunable(struct net_device *dev,
+                         const struct ethtool_tunable *tuna, const void *data)
+{
+       struct fs_enet_private *fep = netdev_priv(dev);
+       struct fs_platform_info *fpi = fep->fpi;
+       int ret = 0;
+
+       switch (tuna->id) {
+       case ETHTOOL_RX_COPYBREAK:
+               fpi->rx_copybreak = *(u32 *)data;
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+       return ret;
+}
+
 static const struct ethtool_ops fs_ethtool_ops = {
        .get_drvinfo = fs_get_drvinfo,
        .get_regs_len = fs_get_regs_len,
@@ -872,6 +879,8 @@ static const struct ethtool_ops fs_ethtool_ops = {
        .get_ts_info = ethtool_op_get_ts_info,
        .get_link_ksettings = phy_ethtool_get_link_ksettings,
        .set_link_ksettings = phy_ethtool_set_link_ksettings,
+       .get_tunable = fs_get_tunable,
+       .set_tunable = fs_set_tunable,
 };
 
 static int fs_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
@@ -939,8 +948,8 @@ static int fs_enet_probe(struct platform_device *ofdev)
                fpi->cp_command = *data;
        }
 
-       fpi->rx_ring = 32;
-       fpi->tx_ring = 64;
+       fpi->rx_ring = RX_RING_SIZE;
+       fpi->tx_ring = TX_RING_SIZE;
        fpi->rx_copybreak = 240;
        fpi->napi_weight = 17;
        fpi->phy_node = of_parse_phandle(ofdev->dev.of_node, "phy-handle", 0);
@@ -1024,8 +1033,7 @@ static int fs_enet_probe(struct platform_device *ofdev)
 
        ndev->netdev_ops = &fs_enet_netdev_ops;
        ndev->watchdog_timeo = 2 * HZ;
-       netif_napi_add(ndev, &fep->napi, fs_enet_rx_napi, fpi->napi_weight);
-       netif_tx_napi_add(ndev, &fep->napi_tx, fs_enet_tx_napi, 2);
+       netif_napi_add(ndev, &fep->napi, fs_enet_napi, fpi->napi_weight);
 
        ndev->ethtool_ops = &fs_ethtool_ops;
 
index e29f54a..fee24c8 100644 (file)
@@ -81,12 +81,9 @@ struct fs_ops {
        void (*adjust_link)(struct net_device *dev);
        void (*restart)(struct net_device *dev);
        void (*stop)(struct net_device *dev);
-       void (*napi_clear_rx_event)(struct net_device *dev);
-       void (*napi_enable_rx)(struct net_device *dev);
-       void (*napi_disable_rx)(struct net_device *dev);
-       void (*napi_clear_tx_event)(struct net_device *dev);
-       void (*napi_enable_tx)(struct net_device *dev);
-       void (*napi_disable_tx)(struct net_device *dev);
+       void (*napi_clear_event)(struct net_device *dev);
+       void (*napi_enable)(struct net_device *dev);
+       void (*napi_disable)(struct net_device *dev);
        void (*rx_bd_done)(struct net_device *dev);
        void (*tx_kickstart)(struct net_device *dev);
        u32 (*get_int_events)(struct net_device *dev);
@@ -122,7 +119,6 @@ struct phy_info {
 
 struct fs_enet_private {
        struct napi_struct napi;
-       struct napi_struct napi_tx;
        struct device *dev;     /* pointer back to the device (must be initialized first) */
        struct net_device *ndev;
        spinlock_t lock;        /* during all ops except TX pckt processing */
@@ -152,10 +148,8 @@ struct fs_enet_private {
        int oldduplex, oldspeed, oldlink;       /* current settings */
 
        /* event masks */
-       u32 ev_napi_rx;         /* mask of NAPI rx events */
-       u32 ev_napi_tx;         /* mask of NAPI rx events */
-       u32 ev_rx;              /* rx event mask          */
-       u32 ev_tx;              /* tx event mask          */
+       u32 ev_napi;            /* mask of NAPI events */
+       u32 ev;                 /* event mask          */
        u32 ev_err;             /* error event mask       */
 
        u16 bd_rx_empty;        /* mask of BD rx empty    */
index d71761a..120c758 100644 (file)
@@ -90,7 +90,7 @@ static int do_pd_setup(struct fs_enet_private *fep)
        int ret = -EINVAL;
 
        fep->interrupt = irq_of_parse_and_map(ofdev->dev.of_node, 0);
-       if (fep->interrupt == NO_IRQ)
+       if (!fep->interrupt)
                goto out;
 
        fep->fcc.fccp = of_iomap(ofdev->dev.of_node, 0);
@@ -124,10 +124,8 @@ out:
        return ret;
 }
 
-#define FCC_NAPI_RX_EVENT_MSK  (FCC_ENET_RXF | FCC_ENET_RXB)
-#define FCC_NAPI_TX_EVENT_MSK  (FCC_ENET_TXB)
-#define FCC_RX_EVENT           (FCC_ENET_RXF)
-#define FCC_TX_EVENT           (FCC_ENET_TXB)
+#define FCC_NAPI_EVENT_MSK     (FCC_ENET_RXF | FCC_ENET_RXB | FCC_ENET_TXB)
+#define FCC_EVENT              (FCC_ENET_RXF | FCC_ENET_TXB)
 #define FCC_ERR_EVENT_MSK      (FCC_ENET_TXE)
 
 static int setup_data(struct net_device *dev)
@@ -137,10 +135,8 @@ static int setup_data(struct net_device *dev)
        if (do_pd_setup(fep) != 0)
                return -EINVAL;
 
-       fep->ev_napi_rx = FCC_NAPI_RX_EVENT_MSK;
-       fep->ev_napi_tx = FCC_NAPI_TX_EVENT_MSK;
-       fep->ev_rx = FCC_RX_EVENT;
-       fep->ev_tx = FCC_TX_EVENT;
+       fep->ev_napi = FCC_NAPI_EVENT_MSK;
+       fep->ev = FCC_EVENT;
        fep->ev_err = FCC_ERR_EVENT_MSK;
 
        return 0;
@@ -424,52 +420,28 @@ static void stop(struct net_device *dev)
        fs_cleanup_bds(dev);
 }
 
-static void napi_clear_rx_event(struct net_device *dev)
+static void napi_clear_event_fs(struct net_device *dev)
 {
        struct fs_enet_private *fep = netdev_priv(dev);
        fcc_t __iomem *fccp = fep->fcc.fccp;
 
-       W16(fccp, fcc_fcce, FCC_NAPI_RX_EVENT_MSK);
+       W16(fccp, fcc_fcce, FCC_NAPI_EVENT_MSK);
 }
 
-static void napi_enable_rx(struct net_device *dev)
+static void napi_enable_fs(struct net_device *dev)
 {
        struct fs_enet_private *fep = netdev_priv(dev);
        fcc_t __iomem *fccp = fep->fcc.fccp;
 
-       S16(fccp, fcc_fccm, FCC_NAPI_RX_EVENT_MSK);
+       S16(fccp, fcc_fccm, FCC_NAPI_EVENT_MSK);
 }
 
-static void napi_disable_rx(struct net_device *dev)
+static void napi_disable_fs(struct net_device *dev)
 {
        struct fs_enet_private *fep = netdev_priv(dev);
        fcc_t __iomem *fccp = fep->fcc.fccp;
 
-       C16(fccp, fcc_fccm, FCC_NAPI_RX_EVENT_MSK);
-}
-
-static void napi_clear_tx_event(struct net_device *dev)
-{
-       struct fs_enet_private *fep = netdev_priv(dev);
-       fcc_t __iomem *fccp = fep->fcc.fccp;
-
-       W16(fccp, fcc_fcce, FCC_NAPI_TX_EVENT_MSK);
-}
-
-static void napi_enable_tx(struct net_device *dev)
-{
-       struct fs_enet_private *fep = netdev_priv(dev);
-       fcc_t __iomem *fccp = fep->fcc.fccp;
-
-       S16(fccp, fcc_fccm, FCC_NAPI_TX_EVENT_MSK);
-}
-
-static void napi_disable_tx(struct net_device *dev)
-{
-       struct fs_enet_private *fep = netdev_priv(dev);
-       fcc_t __iomem *fccp = fep->fcc.fccp;
-
-       C16(fccp, fcc_fccm, FCC_NAPI_TX_EVENT_MSK);
+       C16(fccp, fcc_fccm, FCC_NAPI_EVENT_MSK);
 }
 
 static void rx_bd_done(struct net_device *dev)
@@ -595,12 +567,9 @@ const struct fs_ops fs_fcc_ops = {
        .set_multicast_list     = set_multicast_list,
        .restart                = restart,
        .stop                   = stop,
-       .napi_clear_rx_event    = napi_clear_rx_event,
-       .napi_enable_rx         = napi_enable_rx,
-       .napi_disable_rx        = napi_disable_rx,
-       .napi_clear_tx_event    = napi_clear_tx_event,
-       .napi_enable_tx         = napi_enable_tx,
-       .napi_disable_tx        = napi_disable_tx,
+       .napi_clear_event       = napi_clear_event_fs,
+       .napi_enable            = napi_enable_fs,
+       .napi_disable           = napi_disable_fs,
        .rx_bd_done             = rx_bd_done,
        .tx_kickstart           = tx_kickstart,
        .get_int_events         = get_int_events,
index 35a318e..777beff 100644 (file)
@@ -99,7 +99,7 @@ static int do_pd_setup(struct fs_enet_private *fep)
        struct platform_device *ofdev = to_platform_device(fep->dev);
 
        fep->interrupt = irq_of_parse_and_map(ofdev->dev.of_node, 0);
-       if (fep->interrupt == NO_IRQ)
+       if (!fep->interrupt)
                return -EINVAL;
 
        fep->fec.fecp = of_iomap(ofdev->dev.of_node, 0);
@@ -109,10 +109,8 @@ static int do_pd_setup(struct fs_enet_private *fep)
        return 0;
 }
 
-#define FEC_NAPI_RX_EVENT_MSK  (FEC_ENET_RXF | FEC_ENET_RXB)
-#define FEC_NAPI_TX_EVENT_MSK  (FEC_ENET_TXF)
-#define FEC_RX_EVENT           (FEC_ENET_RXF)
-#define FEC_TX_EVENT           (FEC_ENET_TXF)
+#define FEC_NAPI_EVENT_MSK     (FEC_ENET_RXF | FEC_ENET_RXB | FEC_ENET_TXF)
+#define FEC_EVENT              (FEC_ENET_RXF | FEC_ENET_TXF)
 #define FEC_ERR_EVENT_MSK      (FEC_ENET_HBERR | FEC_ENET_BABR | \
                                 FEC_ENET_BABT | FEC_ENET_EBERR)
 
@@ -126,10 +124,8 @@ static int setup_data(struct net_device *dev)
        fep->fec.hthi = 0;
        fep->fec.htlo = 0;
 
-       fep->ev_napi_rx = FEC_NAPI_RX_EVENT_MSK;
-       fep->ev_napi_tx = FEC_NAPI_TX_EVENT_MSK;
-       fep->ev_rx = FEC_RX_EVENT;
-       fep->ev_tx = FEC_TX_EVENT;
+       fep->ev_napi = FEC_NAPI_EVENT_MSK;
+       fep->ev = FEC_EVENT;
        fep->ev_err = FEC_ERR_EVENT_MSK;
 
        return 0;
@@ -396,52 +392,28 @@ static void stop(struct net_device *dev)
        }
 }
 
-static void napi_clear_rx_event(struct net_device *dev)
+static void napi_clear_event_fs(struct net_device *dev)
 {
        struct fs_enet_private *fep = netdev_priv(dev);
        struct fec __iomem *fecp = fep->fec.fecp;
 
-       FW(fecp, ievent, FEC_NAPI_RX_EVENT_MSK);
+       FW(fecp, ievent, FEC_NAPI_EVENT_MSK);
 }
 
-static void napi_enable_rx(struct net_device *dev)
+static void napi_enable_fs(struct net_device *dev)
 {
        struct fs_enet_private *fep = netdev_priv(dev);
        struct fec __iomem *fecp = fep->fec.fecp;
 
-       FS(fecp, imask, FEC_NAPI_RX_EVENT_MSK);
+       FS(fecp, imask, FEC_NAPI_EVENT_MSK);
 }
 
-static void napi_disable_rx(struct net_device *dev)
+static void napi_disable_fs(struct net_device *dev)
 {
        struct fs_enet_private *fep = netdev_priv(dev);
        struct fec __iomem *fecp = fep->fec.fecp;
 
-       FC(fecp, imask, FEC_NAPI_RX_EVENT_MSK);
-}
-
-static void napi_clear_tx_event(struct net_device *dev)
-{
-       struct fs_enet_private *fep = netdev_priv(dev);
-       struct fec __iomem *fecp = fep->fec.fecp;
-
-       FW(fecp, ievent, FEC_NAPI_TX_EVENT_MSK);
-}
-
-static void napi_enable_tx(struct net_device *dev)
-{
-       struct fs_enet_private *fep = netdev_priv(dev);
-       struct fec __iomem *fecp = fep->fec.fecp;
-
-       FS(fecp, imask, FEC_NAPI_TX_EVENT_MSK);
-}
-
-static void napi_disable_tx(struct net_device *dev)
-{
-       struct fs_enet_private *fep = netdev_priv(dev);
-       struct fec __iomem *fecp = fep->fec.fecp;
-
-       FC(fecp, imask, FEC_NAPI_TX_EVENT_MSK);
+       FC(fecp, imask, FEC_NAPI_EVENT_MSK);
 }
 
 static void rx_bd_done(struct net_device *dev)
@@ -513,12 +485,9 @@ const struct fs_ops fs_fec_ops = {
        .set_multicast_list     = set_multicast_list,
        .restart                = restart,
        .stop                   = stop,
-       .napi_clear_rx_event    = napi_clear_rx_event,
-       .napi_enable_rx         = napi_enable_rx,
-       .napi_disable_rx        = napi_disable_rx,
-       .napi_clear_tx_event    = napi_clear_tx_event,
-       .napi_enable_tx         = napi_enable_tx,
-       .napi_disable_tx        = napi_disable_tx,
+       .napi_clear_event       = napi_clear_event_fs,
+       .napi_enable            = napi_enable_fs,
+       .napi_disable           = napi_disable_fs,
        .rx_bd_done             = rx_bd_done,
        .tx_kickstart           = tx_kickstart,
        .get_int_events         = get_int_events,
index e8b9c33..15abd37 100644 (file)
@@ -99,7 +99,7 @@ static int do_pd_setup(struct fs_enet_private *fep)
        struct platform_device *ofdev = to_platform_device(fep->dev);
 
        fep->interrupt = irq_of_parse_and_map(ofdev->dev.of_node, 0);
-       if (fep->interrupt == NO_IRQ)
+       if (!fep->interrupt)
                return -EINVAL;
 
        fep->scc.sccp = of_iomap(ofdev->dev.of_node, 0);
@@ -115,10 +115,8 @@ static int do_pd_setup(struct fs_enet_private *fep)
        return 0;
 }
 
-#define SCC_NAPI_RX_EVENT_MSK  (SCCE_ENET_RXF | SCCE_ENET_RXB)
-#define SCC_NAPI_TX_EVENT_MSK  (SCCE_ENET_TXB)
-#define SCC_RX_EVENT           (SCCE_ENET_RXF)
-#define SCC_TX_EVENT           (SCCE_ENET_TXB)
+#define SCC_NAPI_EVENT_MSK     (SCCE_ENET_RXF | SCCE_ENET_RXB | SCCE_ENET_TXB)
+#define SCC_EVENT              (SCCE_ENET_RXF | SCCE_ENET_TXB)
 #define SCC_ERR_EVENT_MSK      (SCCE_ENET_TXE | SCCE_ENET_BSY)
 
 static int setup_data(struct net_device *dev)
@@ -130,10 +128,8 @@ static int setup_data(struct net_device *dev)
        fep->scc.hthi = 0;
        fep->scc.htlo = 0;
 
-       fep->ev_napi_rx = SCC_NAPI_RX_EVENT_MSK;
-       fep->ev_napi_tx = SCC_NAPI_TX_EVENT_MSK;
-       fep->ev_rx = SCC_RX_EVENT;
-       fep->ev_tx = SCC_TX_EVENT | SCCE_ENET_TXE;
+       fep->ev_napi = SCC_NAPI_EVENT_MSK;
+       fep->ev = SCC_EVENT | SCCE_ENET_TXE;
        fep->ev_err = SCC_ERR_EVENT_MSK;
 
        return 0;
@@ -379,52 +375,28 @@ static void stop(struct net_device *dev)
        fs_cleanup_bds(dev);
 }
 
-static void napi_clear_rx_event(struct net_device *dev)
+static void napi_clear_event_fs(struct net_device *dev)
 {
        struct fs_enet_private *fep = netdev_priv(dev);
        scc_t __iomem *sccp = fep->scc.sccp;
 
-       W16(sccp, scc_scce, SCC_NAPI_RX_EVENT_MSK);
+       W16(sccp, scc_scce, SCC_NAPI_EVENT_MSK);
 }
 
-static void napi_enable_rx(struct net_device *dev)
+static void napi_enable_fs(struct net_device *dev)
 {
        struct fs_enet_private *fep = netdev_priv(dev);
        scc_t __iomem *sccp = fep->scc.sccp;
 
-       S16(sccp, scc_sccm, SCC_NAPI_RX_EVENT_MSK);
+       S16(sccp, scc_sccm, SCC_NAPI_EVENT_MSK);
 }
 
-static void napi_disable_rx(struct net_device *dev)
+static void napi_disable_fs(struct net_device *dev)
 {
        struct fs_enet_private *fep = netdev_priv(dev);
        scc_t __iomem *sccp = fep->scc.sccp;
 
-       C16(sccp, scc_sccm, SCC_NAPI_RX_EVENT_MSK);
-}
-
-static void napi_clear_tx_event(struct net_device *dev)
-{
-       struct fs_enet_private *fep = netdev_priv(dev);
-       scc_t __iomem *sccp = fep->scc.sccp;
-
-       W16(sccp, scc_scce, SCC_NAPI_TX_EVENT_MSK);
-}
-
-static void napi_enable_tx(struct net_device *dev)
-{
-       struct fs_enet_private *fep = netdev_priv(dev);
-       scc_t __iomem *sccp = fep->scc.sccp;
-
-       S16(sccp, scc_sccm, SCC_NAPI_TX_EVENT_MSK);
-}
-
-static void napi_disable_tx(struct net_device *dev)
-{
-       struct fs_enet_private *fep = netdev_priv(dev);
-       scc_t __iomem *sccp = fep->scc.sccp;
-
-       C16(sccp, scc_sccm, SCC_NAPI_TX_EVENT_MSK);
+       C16(sccp, scc_sccm, SCC_NAPI_EVENT_MSK);
 }
 
 static void rx_bd_done(struct net_device *dev)
@@ -497,12 +469,9 @@ const struct fs_ops fs_scc_ops = {
        .set_multicast_list     = set_multicast_list,
        .restart                = restart,
        .stop                   = stop,
-       .napi_clear_rx_event    = napi_clear_rx_event,
-       .napi_enable_rx         = napi_enable_rx,
-       .napi_disable_rx        = napi_disable_rx,
-       .napi_clear_tx_event    = napi_clear_tx_event,
-       .napi_enable_tx         = napi_enable_tx,
-       .napi_disable_tx        = napi_disable_tx,
+       .napi_clear_event       = napi_clear_event_fs,
+       .napi_enable            = napi_enable_fs,
+       .napi_disable           = napi_disable_fs,
        .rx_bd_done             = rx_bd_done,
        .tx_kickstart           = tx_kickstart,
        .get_int_events         = get_int_events,
index f3c63dc..446c7b3 100644 (file)
@@ -195,7 +195,7 @@ static int fsl_pq_mdio_reset(struct mii_bus *bus)
        return 0;
 }
 
-#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE)
+#if IS_ENABLED(CONFIG_GIANFAR)
 /*
  * Return the TBIPA address, starting from the address
  * of the mapped GFAR MDIO registers (struct gfar)
@@ -228,7 +228,7 @@ static uint32_t __iomem *get_etsec_tbipa(void __iomem *p)
 }
 #endif
 
-#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE)
+#if IS_ENABLED(CONFIG_UCC_GETH)
 /*
  * Return the TBIPAR address for a QE MDIO node, starting from the address
  * of the mapped MII registers (struct fsl_pq_mii)
@@ -306,7 +306,7 @@ static void ucc_configure(phys_addr_t start, phys_addr_t end)
 #endif
 
 static const struct of_device_id fsl_pq_mdio_match[] = {
-#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE)
+#if IS_ENABLED(CONFIG_GIANFAR)
        {
                .compatible = "fsl,gianfar-tbi",
                .data = &(struct fsl_pq_mdio_data) {
@@ -344,7 +344,7 @@ static const struct of_device_id fsl_pq_mdio_match[] = {
                },
        },
 #endif
-#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE)
+#if IS_ENABLED(CONFIG_UCC_GETH)
        {
                .compatible = "fsl,ucc-mdio",
                .data = &(struct fsl_pq_mdio_data) {
index d20935d..4b4f5bc 100644 (file)
@@ -2922,17 +2922,25 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus,
 {
        unsigned int size = lstatus & BD_LENGTH_MASK;
        struct page *page = rxb->page;
+       bool last = !!(lstatus & BD_LFLAG(RXBD_LAST));
 
        /* Remove the FCS from the packet length */
-       if (likely(lstatus & BD_LFLAG(RXBD_LAST)))
+       if (last)
                size -= ETH_FCS_LEN;
 
-       if (likely(first))
+       if (likely(first)) {
                skb_put(skb, size);
-       else
-               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-                               rxb->page_offset + RXBUF_ALIGNMENT,
-                               size, GFAR_RXB_TRUESIZE);
+       } else {
+               /* the last fragments' length contains the full frame length */
+               if (last)
+                       size -= skb->len;
+
+               /* in case the last fragment consisted only of the FCS */
+               if (size > 0)
+                       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+                                       rxb->page_offset + RXBUF_ALIGNMENT,
+                                       size, GFAR_RXB_TRUESIZE);
+       }
 
        /* try reuse page */
        if (unlikely(page_count(page) != 1))
index 373fd09..6e8a9c8 100644 (file)
@@ -100,7 +100,8 @@ extern const char gfar_driver_version[];
 #define DEFAULT_RX_LFC_THR  16
 #define DEFAULT_LFC_PTVVAL  4
 
-#define GFAR_RXB_SIZE 1536
+/* prevent fragmenation by HW in DSA environments */
+#define GFAR_RXB_SIZE roundup(1536 + 8, 64)
 #define GFAR_SKBFRAG_SIZE (RXBUF_ALIGNMENT + GFAR_RXB_SIZE \
                          + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 #define GFAR_RXB_TRUESIZE 2048
index 5bf1ade..186ef8f 100644 (file)
@@ -3756,7 +3756,7 @@ static int ucc_geth_probe(struct platform_device* ofdev)
                        return -EINVAL;
                }
                if ((*prop < QE_CLK_NONE) || (*prop > QE_CLK24)) {
-                       pr_err("invalid rx-clock propperty\n");
+                       pr_err("invalid rx-clock property\n");
                        return -EINVAL;
                }
                ug_info->uf_info.rx_clock = *prop;
index 0c4afe9..a90ab40 100644 (file)
@@ -755,7 +755,7 @@ static void hip04_get_drvinfo(struct net_device *netdev,
        strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
 }
 
-static struct ethtool_ops hip04_ethtool_ops = {
+static const struct ethtool_ops hip04_ethtool_ops = {
        .get_coalesce           = hip04_get_coalesce,
        .set_coalesce           = hip04_set_coalesce,
        .get_drvinfo            = hip04_get_drvinfo,
index b5d7ad0..ca68e22 100644 (file)
@@ -699,7 +699,7 @@ static int hisi_femac_net_ioctl(struct net_device *dev,
        return phy_mii_ioctl(dev->phydev, ifreq, cmd);
 }
 
-static struct ethtool_ops hisi_femac_ethtools_ops = {
+static const struct ethtool_ops hisi_femac_ethtools_ops = {
        .get_link               = ethtool_op_get_link,
        .get_link_ksettings     = phy_ethtool_get_link_ksettings,
        .set_link_ksettings     = phy_ethtool_set_link_ksettings,
index 275618b..e69a6be 100644 (file)
@@ -750,7 +750,7 @@ static const struct net_device_ops hix5hd2_netdev_ops = {
        .ndo_set_mac_address    = hix5hd2_net_set_mac_address,
 };
 
-static struct ethtool_ops hix5hd2_ethtools_ops = {
+static const struct ethtool_ops hix5hd2_ethtools_ops = {
        .get_link               = ethtool_op_get_link,
        .get_link_ksettings     = phy_ethtool_get_link_ksettings,
        .set_link_ksettings     = phy_ethtool_set_link_ksettings,
index afb5daa..eb448df 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/vmalloc.h>
 
@@ -115,10 +116,8 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
 
                        dsaf_dev->sc_base = devm_ioremap_resource(&pdev->dev,
                                                                  res);
-                       if (IS_ERR(dsaf_dev->sc_base)) {
-                               dev_err(dsaf_dev->dev, "subctrl can not map!\n");
+                       if (IS_ERR(dsaf_dev->sc_base))
                                return PTR_ERR(dsaf_dev->sc_base);
-                       }
 
                        res = platform_get_resource(pdev, IORESOURCE_MEM,
                                                    res_idx++);
@@ -129,10 +128,8 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
 
                        dsaf_dev->sds_base = devm_ioremap_resource(&pdev->dev,
                                                                   res);
-                       if (IS_ERR(dsaf_dev->sds_base)) {
-                               dev_err(dsaf_dev->dev, "serdes-ctrl can not map!\n");
+                       if (IS_ERR(dsaf_dev->sds_base))
                                return PTR_ERR(dsaf_dev->sds_base);
-                       }
                } else {
                        dsaf_dev->sub_ctrl = syscon;
                }
@@ -147,10 +144,8 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
                }
        }
        dsaf_dev->ppe_base = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(dsaf_dev->ppe_base)) {
-               dev_err(dsaf_dev->dev, "ppe-base resource can not map!\n");
+       if (IS_ERR(dsaf_dev->ppe_base))
                return PTR_ERR(dsaf_dev->ppe_base);
-       }
        dsaf_dev->ppe_paddr = res->start;
 
        if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) {
@@ -166,10 +161,8 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
                        }
                }
                dsaf_dev->io_base = devm_ioremap_resource(&pdev->dev, res);
-               if (IS_ERR(dsaf_dev->io_base)) {
-                       dev_err(dsaf_dev->dev, "dsaf-base resource can not map!\n");
+               if (IS_ERR(dsaf_dev->io_base))
                        return PTR_ERR(dsaf_dev->io_base);
-               }
        }
 
        ret = device_property_read_u32(dsaf_dev->dev, "desc-num", &desc_num);
@@ -2781,6 +2774,89 @@ static struct platform_driver g_dsaf_driver = {
 
 module_platform_driver(g_dsaf_driver);
 
+/**
+ * hns_dsaf_roce_reset - reset dsaf and roce
+ * @dsaf_fwnode: Pointer to framework node for the dasf
+ * @enable: false - request reset , true - drop reset
+ * retuen 0 - success , negative -fail
+ */
+int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool enable)
+{
+       struct dsaf_device *dsaf_dev;
+       struct platform_device *pdev;
+       u32 mp;
+       u32 sl;
+       u32 credit;
+       int i;
+       const u32 port_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = {
+               {DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0},
+               {DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0},
+               {DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0},
+               {DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0},
+               {DSAF_ROCE_PORT_4, DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1},
+               {DSAF_ROCE_PORT_4, DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1},
+               {DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1},
+               {DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1},
+       };
+       const u32 sl_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = {
+               {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_0},
+               {DSAF_ROCE_SL_0, DSAF_ROCE_SL_1, DSAF_ROCE_SL_1},
+               {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_2},
+               {DSAF_ROCE_SL_0, DSAF_ROCE_SL_1, DSAF_ROCE_SL_3},
+               {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_0},
+               {DSAF_ROCE_SL_1, DSAF_ROCE_SL_1, DSAF_ROCE_SL_1},
+               {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_2},
+               {DSAF_ROCE_SL_1, DSAF_ROCE_SL_1, DSAF_ROCE_SL_3},
+       };
+
+       if (!is_of_node(dsaf_fwnode)) {
+               pr_err("hisi_dsaf: Only support DT node!\n");
+               return -EINVAL;
+       }
+       pdev = of_find_device_by_node(to_of_node(dsaf_fwnode));
+       dsaf_dev = dev_get_drvdata(&pdev->dev);
+       if (AE_IS_VER1(dsaf_dev->dsaf_ver)) {
+               dev_err(dsaf_dev->dev, "%s v1 chip doesn't support RoCE!\n",
+                       dsaf_dev->ae_dev.name);
+               return -ENODEV;
+       }
+
+       if (!enable) {
+               /* Reset rocee-channels in dsaf and rocee */
+               hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, false);
+               hns_dsaf_roce_srst(dsaf_dev, false);
+       } else {
+               /* Configure dsaf tx roce correspond to port map and sl map */
+               mp = dsaf_read_dev(dsaf_dev, DSAF_ROCE_PORT_MAP_REG);
+               for (i = 0; i < DSAF_ROCE_CREDIT_CHN; i++)
+                       dsaf_set_field(mp, 7 << i * 3, i * 3,
+                                      port_map[i][DSAF_ROCE_6PORT_MODE]);
+               dsaf_set_field(mp, 3 << i * 3, i * 3, 0);
+               dsaf_write_dev(dsaf_dev, DSAF_ROCE_PORT_MAP_REG, mp);
+
+               sl = dsaf_read_dev(dsaf_dev, DSAF_ROCE_SL_MAP_REG);
+               for (i = 0; i < DSAF_ROCE_CREDIT_CHN; i++)
+                       dsaf_set_field(sl, 3 << i * 2, i * 2,
+                                      sl_map[i][DSAF_ROCE_6PORT_MODE]);
+               dsaf_write_dev(dsaf_dev, DSAF_ROCE_SL_MAP_REG, sl);
+
+               /* De-reset rocee-channels in dsaf and rocee */
+               hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, true);
+               msleep(SRST_TIME_INTERVAL);
+               hns_dsaf_roce_srst(dsaf_dev, true);
+
+               /* Eanble dsaf channel rocee credit */
+               credit = dsaf_read_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG);
+               dsaf_set_bit(credit, DSAF_SBM_ROCEE_CFG_CRD_EN_B, 0);
+               dsaf_write_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG, credit);
+
+               dsaf_set_bit(credit, DSAF_SBM_ROCEE_CFG_CRD_EN_B, 1);
+               dsaf_write_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG, credit);
+       }
+       return 0;
+}
+EXPORT_SYMBOL(hns_dsaf_roce_reset);
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Huawei Tech. Co., Ltd.");
 MODULE_DESCRIPTION("HNS DSAF driver");
index 1daf018..f3681d5 100644 (file)
@@ -43,6 +43,32 @@ struct hns_mac_cb;
 #define DSAF_PRIO_NR   8
 #define DSAF_REG_PER_ZONE      3
 
+#define DSAF_ROCE_CREDIT_CHN   8
+#define DSAF_ROCE_CHAN_MODE    3
+
+enum dsaf_roce_port_mode {
+       DSAF_ROCE_6PORT_MODE,
+       DSAF_ROCE_4PORT_MODE,
+       DSAF_ROCE_2PORT_MODE,
+       DSAF_ROCE_CHAN_MODE_NUM,
+};
+
+enum dsaf_roce_port_num {
+       DSAF_ROCE_PORT_0,
+       DSAF_ROCE_PORT_1,
+       DSAF_ROCE_PORT_2,
+       DSAF_ROCE_PORT_3,
+       DSAF_ROCE_PORT_4,
+       DSAF_ROCE_PORT_5,
+};
+
+enum dsaf_roce_qos_sl {
+       DSAF_ROCE_SL_0,
+       DSAF_ROCE_SL_1,
+       DSAF_ROCE_SL_2,
+       DSAF_ROCE_SL_3,
+};
+
 #define DSAF_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset))))
 #define HNS_DSAF_IS_DEBUG(dev) (dev->dsaf_mode == DSAF_MODE_DISABLE_SP)
 
@@ -419,6 +445,10 @@ int hns_dsaf_get_mac_entry_by_index(
 
 void hns_dsaf_fix_mac_mode(struct hns_mac_cb *mac_cb);
 
+void hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool enable);
+
+void hns_dsaf_roce_srst(struct dsaf_device *dsaf_dev, bool enable);
+
 int hns_dsaf_ae_init(struct dsaf_device *dsaf_dev);
 void hns_dsaf_ae_uninit(struct dsaf_device *dsaf_dev);
 
index 611b67b..36b9f79 100644 (file)
@@ -231,6 +231,42 @@ static void hns_dsaf_xge_core_srst_by_port(struct dsaf_device *dsaf_dev,
        dsaf_write_sub(dsaf_dev, reg_addr, reg_val);
 }
 
+/**
+ * hns_dsaf_srst_chns - reset dsaf channels
+ * @dsaf_dev: dsaf device struct pointer
+ * @msk: xbar channels mask value:
+ * bit0-5 for xge0-5
+ * bit6-11 for ppe0-5
+ * bit12-17 for roce0-5
+ * bit18-19 for com/dfx
+ * @enable: false - request reset , true - drop reset
+ */
+void hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool enable)
+{
+       u32 reg_addr;
+
+       if (!enable)
+               reg_addr = DSAF_SUB_SC_DSAF_RESET_REQ_REG;
+       else
+               reg_addr = DSAF_SUB_SC_DSAF_RESET_DREQ_REG;
+
+       dsaf_write_sub(dsaf_dev, reg_addr, msk);
+}
+
+void hns_dsaf_roce_srst(struct dsaf_device *dsaf_dev, bool enable)
+{
+       if (!enable) {
+               dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_ROCEE_RESET_REQ_REG, 1);
+       } else {
+               dsaf_write_sub(dsaf_dev,
+                              DSAF_SUB_SC_ROCEE_CLK_DIS_REG, 1);
+               dsaf_write_sub(dsaf_dev,
+                              DSAF_SUB_SC_ROCEE_RESET_DREQ_REG, 1);
+               msleep(20);
+               dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_ROCEE_CLK_EN_REG, 1);
+       }
+}
+
 static void
 hns_dsaf_xge_core_srst_by_port_acpi(struct dsaf_device *dsaf_dev,
                                    u32 port, bool dereset)
index ff8b6a4..6ea8722 100644 (file)
@@ -328,9 +328,10 @@ static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb)
 static void hns_ppe_uninit_hw(struct hns_ppe_cb *ppe_cb)
 {
        u32 port;
-       struct dsaf_device *dsaf_dev = ppe_cb->ppe_common_cb->dsaf_dev;
 
        if (ppe_cb->ppe_common_cb) {
+               struct dsaf_device *dsaf_dev = ppe_cb->ppe_common_cb->dsaf_dev;
+
                port = ppe_cb->index;
                dsaf_dev->misc_op->ppe_srst(dsaf_dev, port, 0);
        }
index 235f744..13c16ab 100644 (file)
 #define DSAF_SUB_SC_PPE_RESET_DREQ_REG                 0xA4C
 #define DSAF_SUB_SC_RCB_PPE_COM_RESET_REQ_REG          0xA88
 #define DSAF_SUB_SC_RCB_PPE_COM_RESET_DREQ_REG         0xA8C
+#define DSAF_SUB_SC_DSAF_RESET_REQ_REG                 0xAA8
+#define DSAF_SUB_SC_ROCEE_RESET_REQ_REG                        0xA50
+#define DSAF_SUB_SC_DSAF_RESET_DREQ_REG                        0xAAC
+#define DSAF_SUB_SC_ROCEE_CLK_DIS_REG                  0x32C
+#define DSAF_SUB_SC_ROCEE_RESET_DREQ_REG               0xA54
+#define DSAF_SUB_SC_ROCEE_CLK_EN_REG                   0x328
 #define DSAF_SUB_SC_LIGHT_MODULE_DETECT_EN_REG         0x2060
 #define DSAF_SUB_SC_TCAM_MBIST_EN_REG                  0x2300
 #define DSAF_SUB_SC_DSAF_CLK_ST_REG                    0x5300
 #define DSAF_ROCEE_INT_STS_0_REG       0x200
 #define DSAFV2_SERDES_LBK_0_REG         0x220
 #define DSAF_PAUSE_CFG_REG             0x240
+#define DSAF_ROCE_PORT_MAP_REG         0x2A0
+#define DSAF_ROCE_SL_MAP_REG           0x2A4
 #define DSAF_PPE_QID_CFG_0_REG         0x300
 #define DSAF_SW_PORT_TYPE_0_REG                0x320
 #define DSAF_STP_PORT_TYPE_0_REG       0x340
 #define DSAF_SBM_BP_CFG_2_XGE_REG_0_REG                0x200C
 #define DSAF_SBM_BP_CFG_2_PPE_REG_0_REG                0x230C
 #define DSAF_SBM_BP_CFG_2_ROCEE_REG_0_REG      0x260C
+#define DSAF_SBM_ROCEE_CFG_REG_REG             0x2380
 #define DSAFV2_SBM_BP_CFG_2_ROCEE_REG_0_REG    0x238C
 #define DSAF_SBM_FREE_CNT_0_0_REG              0x2010
 #define DSAF_SBM_FREE_CNT_1_0_REG              0x2014
 #define DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_S 9
 #define DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_M (((1ULL << 9) - 1) << 9)
 
+#define DSAF_CHNS_MASK                 0x3f000
+#define DSAF_SBM_ROCEE_CFG_CRD_EN_B    2
+#define SRST_TIME_INTERVAL             20
 #define DSAFV2_SBM_CFG2_ROCEE_SET_BUF_NUM_S 0
 #define DSAFV2_SBM_CFG2_ROCEE_SET_BUF_NUM_M (((1ULL << 8) - 1) << 0)
 #define DSAFV2_SBM_CFG2_ROCEE_RESET_BUF_NUM_S 8
index ab33487..5eb3245 100644 (file)
@@ -1264,7 +1264,7 @@ static int hns_get_rxnfc(struct net_device *netdev,
        return 0;
 }
 
-static struct ethtool_ops hns_ethtool_ops = {
+static const struct ethtool_ops hns_ethtool_ops = {
        .get_drvinfo = hns_nic_get_drvinfo,
        .get_link  = hns_nic_get_link,
        .get_settings  = hns_nic_get_settings,
index befb4ac..ce235b7 100644 (file)
@@ -89,10 +89,10 @@ static char version[] __initdata =
 #define DEB(x,y)       if (i596_debug & (x)) y
 
 
-#if defined(CONFIG_MVME16x_NET) || defined(CONFIG_MVME16x_NET_MODULE)
+#if IS_ENABLED(CONFIG_MVME16x_NET)
 #define ENABLE_MVME16x_NET
 #endif
-#if defined(CONFIG_BVME6000_NET) || defined(CONFIG_BVME6000_NET_MODULE)
+#if IS_ENABLED(CONFIG_BVME6000_NET)
 #define ENABLE_BVME6000_NET
 #endif
 
index 4c9771d..ec4d0f3 100644 (file)
@@ -2750,7 +2750,7 @@ static int emac_probe(struct platform_device *ofdev)
        /* Get interrupts. EMAC irq is mandatory, WOL irq is optional */
        dev->emac_irq = irq_of_parse_and_map(np, 0);
        dev->wol_irq = irq_of_parse_and_map(np, 1);
-       if (dev->emac_irq == NO_IRQ) {
+       if (!dev->emac_irq) {
                printk(KERN_ERR "%s: Can't map main interrupt\n", np->full_name);
                goto err_free;
        }
@@ -2913,9 +2913,9 @@ static int emac_probe(struct platform_device *ofdev)
  err_reg_unmap:
        iounmap(dev->emacp);
  err_irq_unmap:
-       if (dev->wol_irq != NO_IRQ)
+       if (dev->wol_irq)
                irq_dispose_mapping(dev->wol_irq);
-       if (dev->emac_irq != NO_IRQ)
+       if (dev->emac_irq)
                irq_dispose_mapping(dev->emac_irq);
  err_free:
        free_netdev(ndev);
@@ -2957,9 +2957,9 @@ static int emac_remove(struct platform_device *ofdev)
        emac_dbg_unregister(dev);
        iounmap(dev->emacp);
 
-       if (dev->wol_irq != NO_IRQ)
+       if (dev->wol_irq)
                irq_dispose_mapping(dev->wol_irq);
-       if (dev->emac_irq != NO_IRQ)
+       if (dev->emac_irq)
                irq_dispose_mapping(dev->emac_irq);
 
        free_netdev(dev->ndev);
index fdb5cdb..aaf6fec 100644 (file)
@@ -597,9 +597,8 @@ static int mal_probe(struct platform_device *ofdev)
                mal->rxde_irq = irq_of_parse_and_map(ofdev->dev.of_node, 4);
        }
 
-       if (mal->txeob_irq == NO_IRQ || mal->rxeob_irq == NO_IRQ ||
-           mal->serr_irq == NO_IRQ || mal->txde_irq == NO_IRQ ||
-           mal->rxde_irq == NO_IRQ) {
+       if (!mal->txeob_irq || !mal->rxeob_irq || !mal->serr_irq ||
+           !mal->txde_irq  || !mal->rxde_irq) {
                printk(KERN_ERR
                       "mal%d: failed to map interrupts !\n", index);
                err = -ENODEV;
index 88f3c85..bfe17d9 100644 (file)
@@ -203,7 +203,8 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter,
        struct device *dev = &adapter->vdev->dev;
 
        dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
-       send_request_unmap(adapter, ltb->map_id);
+       if (!adapter->failover)
+               send_request_unmap(adapter, ltb->map_id);
 }
 
 static int alloc_rx_pool(struct ibmvnic_adapter *adapter,
@@ -522,7 +523,8 @@ static int ibmvnic_close(struct net_device *netdev)
        for (i = 0; i < adapter->req_rx_queues; i++)
                napi_disable(&adapter->napi[i]);
 
-       netif_tx_stop_all_queues(netdev);
+       if (!adapter->failover)
+               netif_tx_stop_all_queues(netdev);
 
        if (adapter->bounce_buffer) {
                if (!dma_mapping_error(dev, adapter->bounce_buffer_dma)) {
@@ -1422,7 +1424,7 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter)
                scrq = adapter->tx_scrq[i];
                scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
 
-               if (scrq->irq == NO_IRQ) {
+               if (!scrq->irq) {
                        rc = -EINVAL;
                        dev_err(dev, "Error mapping irq\n");
                        goto req_tx_irq_failed;
@@ -1442,7 +1444,7 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter)
        for (i = 0; i < adapter->req_rx_queues; i++) {
                scrq = adapter->rx_scrq[i];
                scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
-               if (scrq->irq == NO_IRQ) {
+               if (!scrq->irq) {
                        rc = -EINVAL;
                        dev_err(dev, "Error mapping irq\n");
                        goto req_rx_irq_failed;
@@ -2777,12 +2779,6 @@ static void handle_control_ras_rsp(union ibmvnic_crq *crq,
        }
 }
 
-static int ibmvnic_fw_comp_open(struct inode *inode, struct file *file)
-{
-       file->private_data = inode->i_private;
-       return 0;
-}
-
 static ssize_t trace_read(struct file *file, char __user *user_buf, size_t len,
                          loff_t *ppos)
 {
@@ -2834,7 +2830,7 @@ static ssize_t trace_read(struct file *file, char __user *user_buf, size_t len,
 
 static const struct file_operations trace_ops = {
        .owner          = THIS_MODULE,
-       .open           = ibmvnic_fw_comp_open,
+       .open           = simple_open,
        .read           = trace_read,
 };
 
@@ -2884,7 +2880,7 @@ static ssize_t paused_write(struct file *file, const char __user *user_buf,
 
 static const struct file_operations paused_ops = {
        .owner          = THIS_MODULE,
-       .open           = ibmvnic_fw_comp_open,
+       .open           = simple_open,
        .read           = paused_read,
        .write          = paused_write,
 };
@@ -2932,7 +2928,7 @@ static ssize_t tracing_write(struct file *file, const char __user *user_buf,
 
 static const struct file_operations tracing_ops = {
        .owner          = THIS_MODULE,
-       .open           = ibmvnic_fw_comp_open,
+       .open           = simple_open,
        .read           = tracing_read,
        .write          = tracing_write,
 };
@@ -2985,7 +2981,7 @@ static ssize_t error_level_write(struct file *file, const char __user *user_buf,
 
 static const struct file_operations error_level_ops = {
        .owner          = THIS_MODULE,
-       .open           = ibmvnic_fw_comp_open,
+       .open           = simple_open,
        .read           = error_level_read,
        .write          = error_level_write,
 };
@@ -3036,7 +3032,7 @@ static ssize_t trace_level_write(struct file *file, const char __user *user_buf,
 
 static const struct file_operations trace_level_ops = {
        .owner          = THIS_MODULE,
-       .open           = ibmvnic_fw_comp_open,
+       .open           = simple_open,
        .read           = trace_level_read,
        .write          = trace_level_write,
 };
@@ -3089,7 +3085,7 @@ static ssize_t trace_buff_size_write(struct file *file,
 
 static const struct file_operations trace_size_ops = {
        .owner          = THIS_MODULE,
-       .open           = ibmvnic_fw_comp_open,
+       .open           = simple_open,
        .read           = trace_buff_size_read,
        .write          = trace_buff_size_write,
 };
@@ -3280,6 +3276,10 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
                        rc = ibmvnic_send_crq_init(adapter);
                        if (rc)
                                dev_err(dev, "Error sending init rc=%ld\n", rc);
+               } else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) {
+                       dev_info(dev, "Backing device failover detected\n");
+                       netif_carrier_off(netdev);
+                       adapter->failover = true;
                } else {
                        /* The adapter lost the connection */
                        dev_err(dev, "Virtual Adapter failed (rc=%d)\n",
@@ -3615,8 +3615,18 @@ static void handle_crq_init_rsp(struct work_struct *work)
        struct device *dev = &adapter->vdev->dev;
        struct net_device *netdev = adapter->netdev;
        unsigned long timeout = msecs_to_jiffies(30000);
+       bool restart = false;
        int rc;
 
+       if (adapter->failover) {
+               release_sub_crqs(adapter);
+               if (netif_running(netdev)) {
+                       netif_tx_disable(netdev);
+                       ibmvnic_close(netdev);
+                       restart = true;
+               }
+       }
+
        send_version_xchg(adapter);
        reinit_completion(&adapter->init_done);
        if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
@@ -3645,6 +3655,17 @@ static void handle_crq_init_rsp(struct work_struct *work)
 
        netdev->real_num_tx_queues = adapter->req_tx_queues;
 
+       if (adapter->failover) {
+               adapter->failover = false;
+               if (restart) {
+                       rc = ibmvnic_open(netdev);
+                       if (rc)
+                               goto restart_failed;
+               }
+               netif_carrier_on(netdev);
+               return;
+       }
+
        rc = register_netdev(netdev);
        if (rc) {
                dev_err(dev,
@@ -3655,6 +3676,8 @@ static void handle_crq_init_rsp(struct work_struct *work)
 
        return;
 
+restart_failed:
+       dev_err(dev, "Failed to restart ibmvnic, rc=%d\n", rc);
 register_failed:
        release_sub_crqs(adapter);
 task_failed:
@@ -3692,6 +3715,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
        dev_set_drvdata(&dev->dev, netdev);
        adapter->vdev = dev;
        adapter->netdev = netdev;
+       adapter->failover = false;
 
        ether_addr_copy(adapter->mac_addr, mac_addr_p);
        ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
@@ -3721,6 +3745,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
        if (dma_mapping_error(&dev->dev, adapter->stats_token)) {
                if (!firmware_has_feature(FW_FEATURE_CMO))
                        dev_err(&dev->dev, "Couldn't map stats buffer\n");
+               rc = -ENOMEM;
                goto free_crq;
        }
 
index e82898f..bfc84c7 100644 (file)
@@ -830,6 +830,7 @@ enum ibmvfc_crq_format {
        IBMVNIC_CRQ_INIT                 = 0x01,
        IBMVNIC_CRQ_INIT_COMPLETE        = 0x02,
        IBMVNIC_PARTITION_MIGRATED       = 0x06,
+       IBMVNIC_DEVICE_FAILOVER          = 0x08,
 };
 
 struct ibmvnic_crq_queue {
@@ -1047,4 +1048,5 @@ struct ibmvnic_adapter {
        u8 map_id;
 
        struct work_struct vnic_crq_init;
+       bool failover;
 };
index c4cf08d..67ff01a 100644 (file)
@@ -240,9 +240,7 @@ struct fm10k_iov_data {
        struct fm10k_vf_info    vf_info[0];
 };
 
-#define fm10k_vxlan_port_for_each(vp, intfc) \
-       list_for_each_entry(vp, &(intfc)->vxlan_port, list)
-struct fm10k_vxlan_port {
+struct fm10k_udp_port {
        struct list_head        list;
        sa_family_t             sa_family;
        __be16                  port;
@@ -335,8 +333,9 @@ struct fm10k_intfc {
        u32 reta[FM10K_RETA_SIZE];
        u32 rssrk[FM10K_RSSRK_SIZE];
 
-       /* VXLAN port tracking information */
+       /* UDP encapsulation port tracking information */
        struct list_head vxlan_port;
+       struct list_head geneve_port;
 
 #ifdef CONFIG_DEBUG_FS
        struct dentry *dbg_intfc;
@@ -458,7 +457,7 @@ __be16 fm10k_tx_encap_offload(struct sk_buff *skb);
 netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
                                  struct fm10k_ring *tx_ring);
 void fm10k_tx_timeout_reset(struct fm10k_intfc *interface);
-u64 fm10k_get_tx_pending(struct fm10k_ring *ring);
+u64 fm10k_get_tx_pending(struct fm10k_ring *ring, bool in_sw);
 bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring);
 void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count);
 
@@ -496,7 +495,6 @@ int fm10k_close(struct net_device *netdev);
 
 /* Ethtool */
 void fm10k_set_ethtool_ops(struct net_device *dev);
-u32 fm10k_get_reta_size(struct net_device *netdev);
 void fm10k_write_reta(struct fm10k_intfc *interface, const u32 *indir);
 
 /* IOV */
index d6baaea..dd95ac4 100644 (file)
@@ -207,6 +207,9 @@ s32 fm10k_disable_queues_generic(struct fm10k_hw *hw, u16 q_cnt)
        /* clear tx_ready to prevent any false hits for reset */
        hw->mac.tx_ready = false;
 
+       if (FM10K_REMOVED(hw->hw_addr))
+               return 0;
+
        /* clear the enable bit for all rings */
        for (i = 0; i < q_cnt; i++) {
                reg = fm10k_read_reg(hw, FM10K_TXDCTL(i));
index 50f71e9..d51f9c7 100644 (file)
@@ -34,7 +34,7 @@ u32 fm10k_read_reg(struct fm10k_hw *hw, int reg);
 /* write operations, indexed using DWORDS */
 #define fm10k_write_reg(hw, reg, val) \
 do { \
-       u32 __iomem *hw_addr = ACCESS_ONCE((hw)->hw_addr); \
+       u32 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
        if (!FM10K_REMOVED(hw_addr)) \
                writel((val), &hw_addr[(reg)]); \
 } while (0)
@@ -42,7 +42,7 @@ do { \
 /* Switch register write operations, index using DWORDS */
 #define fm10k_write_sw_reg(hw, reg, val) \
 do { \
-       u32 __iomem *sw_addr = ACCESS_ONCE((hw)->sw_addr); \
+       u32 __iomem *sw_addr = READ_ONCE((hw)->sw_addr); \
        if (!FM10K_REMOVED(sw_addr)) \
                writel((val), &sw_addr[(reg)]); \
 } while (0)
index c04cbe9..adb7cb4 100644 (file)
@@ -966,7 +966,7 @@ static int fm10k_set_priv_flags(struct net_device *netdev, u32 priv_flags)
        return 0;
 }
 
-u32 fm10k_get_reta_size(struct net_device __always_unused *netdev)
+static u32 fm10k_get_reta_size(struct net_device __always_unused *netdev)
 {
        return FM10K_RETA_SIZE * FM10K_RETA_ENTRIES_PER_REG;
 }
index 47f0743..d9dec81 100644 (file)
@@ -51,7 +51,7 @@ s32 fm10k_iov_event(struct fm10k_intfc *interface)
        int i;
 
        /* if there is no iov_data then there is no mailbox to process */
-       if (!ACCESS_ONCE(interface->iov_data))
+       if (!READ_ONCE(interface->iov_data))
                return 0;
 
        rcu_read_lock();
@@ -99,7 +99,7 @@ s32 fm10k_iov_mbx(struct fm10k_intfc *interface)
        int i;
 
        /* if there is no iov_data then there is no mailbox to process */
-       if (!ACCESS_ONCE(interface->iov_data))
+       if (!READ_ONCE(interface->iov_data))
                return 0;
 
        rcu_read_lock();
index e9767b6..0d39103 100644 (file)
@@ -56,7 +56,7 @@ static int __init fm10k_init_module(void)
        pr_info("%s\n", fm10k_copyright);
 
        /* create driver workqueue */
-       fm10k_workqueue = alloc_workqueue("fm10k", WQ_MEM_RECLAIM, 0);
+       fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, fm10k_driver_name);
 
        fm10k_dbg_init();
 
@@ -651,11 +651,11 @@ static int fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector,
 static struct ethhdr *fm10k_port_is_vxlan(struct sk_buff *skb)
 {
        struct fm10k_intfc *interface = netdev_priv(skb->dev);
-       struct fm10k_vxlan_port *vxlan_port;
+       struct fm10k_udp_port *vxlan_port;
 
        /* we can only offload a vxlan if we recognize it as such */
        vxlan_port = list_first_entry_or_null(&interface->vxlan_port,
-                                             struct fm10k_vxlan_port, list);
+                                             struct fm10k_udp_port, list);
 
        if (!vxlan_port)
                return NULL;
@@ -1128,13 +1128,24 @@ static u64 fm10k_get_tx_completed(struct fm10k_ring *ring)
        return ring->stats.packets;
 }
 
-u64 fm10k_get_tx_pending(struct fm10k_ring *ring)
+/**
+ * fm10k_get_tx_pending - how many Tx descriptors not processed
+ * @ring: the ring structure
+ * @in_sw: is tx_pending being checked in SW or in HW?
+ */
+u64 fm10k_get_tx_pending(struct fm10k_ring *ring, bool in_sw)
 {
        struct fm10k_intfc *interface = ring->q_vector->interface;
        struct fm10k_hw *hw = &interface->hw;
+       u32 head, tail;
 
-       u32 head = fm10k_read_reg(hw, FM10K_TDH(ring->reg_idx));
-       u32 tail = fm10k_read_reg(hw, FM10K_TDT(ring->reg_idx));
+       if (likely(in_sw)) {
+               head = ring->next_to_clean;
+               tail = ring->next_to_use;
+       } else {
+               head = fm10k_read_reg(hw, FM10K_TDH(ring->reg_idx));
+               tail = fm10k_read_reg(hw, FM10K_TDT(ring->reg_idx));
+       }
 
        return ((head <= tail) ? tail : tail + ring->count) - head;
 }
@@ -1143,7 +1154,7 @@ bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring)
 {
        u32 tx_done = fm10k_get_tx_completed(tx_ring);
        u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
-       u32 tx_pending = fm10k_get_tx_pending(tx_ring);
+       u32 tx_pending = fm10k_get_tx_pending(tx_ring, true);
 
        clear_check_for_tx_hang(tx_ring);
 
@@ -1397,7 +1408,7 @@ static void fm10k_update_itr(struct fm10k_ring_container *ring_container)
         * that the calculation will never get below a 1. The bit shift
         * accounts for changes in the ITR due to PCIe link speed.
         */
-       itr_round = ACCESS_ONCE(ring_container->itr_scale) + 8;
+       itr_round = READ_ONCE(ring_container->itr_scale) + 8;
        avg_wire_size += BIT(itr_round) - 1;
        avg_wire_size >>= itr_round;
 
@@ -1473,7 +1484,7 @@ static int fm10k_poll(struct napi_struct *napi, int budget)
        /* re-enable the q_vector */
        fm10k_qv_enable(q_vector);
 
-       return 0;
+       return min(work_done, budget - 1);
 }
 
 /**
index 20a5bbe..0562938 100644 (file)
@@ -384,129 +384,171 @@ static void fm10k_request_glort_range(struct fm10k_intfc *interface)
 }
 
 /**
- * fm10k_del_vxlan_port_all
+ * fm10k_free_udp_port_info
  * @interface: board private structure
  *
- * This function frees the entire vxlan_port list
+ * This function frees both geneve_port and vxlan_port structures
  **/
-static void fm10k_del_vxlan_port_all(struct fm10k_intfc *interface)
+static void fm10k_free_udp_port_info(struct fm10k_intfc *interface)
 {
-       struct fm10k_vxlan_port *vxlan_port;
-
-       /* flush all entries from list */
-       vxlan_port = list_first_entry_or_null(&interface->vxlan_port,
-                                             struct fm10k_vxlan_port, list);
-       while (vxlan_port) {
-               list_del(&vxlan_port->list);
-               kfree(vxlan_port);
-               vxlan_port = list_first_entry_or_null(&interface->vxlan_port,
-                                                     struct fm10k_vxlan_port,
-                                                     list);
+       struct fm10k_udp_port *port;
+
+       /* flush all entries from vxlan list */
+       port = list_first_entry_or_null(&interface->vxlan_port,
+                                       struct fm10k_udp_port, list);
+       while (port) {
+               list_del(&port->list);
+               kfree(port);
+               port = list_first_entry_or_null(&interface->vxlan_port,
+                                               struct fm10k_udp_port,
+                                               list);
+       }
+
+       /* flush all entries from geneve list */
+       port = list_first_entry_or_null(&interface->geneve_port,
+                                       struct fm10k_udp_port, list);
+       while (port) {
+               list_del(&port->list);
+               kfree(port);
+               port = list_first_entry_or_null(&interface->vxlan_port,
+                                               struct fm10k_udp_port,
+                                               list);
        }
 }
 
 /**
- * fm10k_restore_vxlan_port
+ * fm10k_restore_udp_port_info
  * @interface: board private structure
  *
- * This function restores the value in the tunnel_cfg register after reset
+ * This function restores the value in the tunnel_cfg register(s) after reset
  **/
-static void fm10k_restore_vxlan_port(struct fm10k_intfc *interface)
+static void fm10k_restore_udp_port_info(struct fm10k_intfc *interface)
 {
        struct fm10k_hw *hw = &interface->hw;
-       struct fm10k_vxlan_port *vxlan_port;
+       struct fm10k_udp_port *port;
 
        /* only the PF supports configuring tunnels */
        if (hw->mac.type != fm10k_mac_pf)
                return;
 
-       vxlan_port = list_first_entry_or_null(&interface->vxlan_port,
-                                             struct fm10k_vxlan_port, list);
+       port = list_first_entry_or_null(&interface->vxlan_port,
+                                       struct fm10k_udp_port, list);
 
        /* restore tunnel configuration register */
        fm10k_write_reg(hw, FM10K_TUNNEL_CFG,
-                       (vxlan_port ? ntohs(vxlan_port->port) : 0) |
+                       (port ? ntohs(port->port) : 0) |
                        (ETH_P_TEB << FM10K_TUNNEL_CFG_NVGRE_SHIFT));
+
+       port = list_first_entry_or_null(&interface->geneve_port,
+                                       struct fm10k_udp_port, list);
+
+       /* restore Geneve tunnel configuration register */
+       fm10k_write_reg(hw, FM10K_TUNNEL_CFG_GENEVE,
+                       (port ? ntohs(port->port) : 0));
+}
+
+static struct fm10k_udp_port *
+fm10k_remove_tunnel_port(struct list_head *ports,
+                        struct udp_tunnel_info *ti)
+{
+       struct fm10k_udp_port *port;
+
+       list_for_each_entry(port, ports, list) {
+               if ((port->port == ti->port) &&
+                   (port->sa_family == ti->sa_family)) {
+                       list_del(&port->list);
+                       return port;
+               }
+       }
+
+       return NULL;
+}
+
+static void fm10k_insert_tunnel_port(struct list_head *ports,
+                                    struct udp_tunnel_info *ti)
+{
+       struct fm10k_udp_port *port;
+
+       /* remove existing port entry from the list so that the newest items
+        * are always at the tail of the list.
+        */
+       port = fm10k_remove_tunnel_port(ports, ti);
+       if (!port) {
+               port = kmalloc(sizeof(*port), GFP_ATOMIC);
+               if  (!port)
+                       return;
+               port->port = ti->port;
+               port->sa_family = ti->sa_family;
+       }
+
+       list_add_tail(&port->list, ports);
 }
 
 /**
- * fm10k_add_vxlan_port
+ * fm10k_udp_tunnel_add
  * @netdev: network interface device structure
  * @ti: Tunnel endpoint information
  *
- * This function is called when a new VXLAN interface has added a new port
- * number to the range that is currently in use for VXLAN.  The new port
- * number is always added to the tail so that the port number list should
- * match the order in which the ports were allocated.  The head of the list
- * is always used as the VXLAN port number for offloads.
+ * This function is called when a new UDP tunnel port has been added.
+ * Due to hardware restrictions, only one port per type can be offloaded at
+ * once.
  **/
-static void fm10k_add_vxlan_port(struct net_device *dev,
+static void fm10k_udp_tunnel_add(struct net_device *dev,
                                 struct udp_tunnel_info *ti)
 {
        struct fm10k_intfc *interface = netdev_priv(dev);
-       struct fm10k_vxlan_port *vxlan_port;
 
-       if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
-               return;
        /* only the PF supports configuring tunnels */
        if (interface->hw.mac.type != fm10k_mac_pf)
                return;
 
-       /* existing ports are pulled out so our new entry is always last */
-       fm10k_vxlan_port_for_each(vxlan_port, interface) {
-               if ((vxlan_port->port == ti->port) &&
-                   (vxlan_port->sa_family == ti->sa_family)) {
-                       list_del(&vxlan_port->list);
-                       goto insert_tail;
-               }
-       }
-
-       /* allocate memory to track ports */
-       vxlan_port = kmalloc(sizeof(*vxlan_port), GFP_ATOMIC);
-       if (!vxlan_port)
+       switch (ti->type) {
+       case UDP_TUNNEL_TYPE_VXLAN:
+               fm10k_insert_tunnel_port(&interface->vxlan_port, ti);
+               break;
+       case UDP_TUNNEL_TYPE_GENEVE:
+               fm10k_insert_tunnel_port(&interface->geneve_port, ti);
+               break;
+       default:
                return;
-       vxlan_port->port = ti->port;
-       vxlan_port->sa_family = ti->sa_family;
-
-insert_tail:
-       /* add new port value to list */
-       list_add_tail(&vxlan_port->list, &interface->vxlan_port);
+       }
 
-       fm10k_restore_vxlan_port(interface);
+       fm10k_restore_udp_port_info(interface);
 }
 
 /**
- * fm10k_del_vxlan_port
+ * fm10k_udp_tunnel_del
  * @netdev: network interface device structure
  * @ti: Tunnel endpoint information
  *
- * This function is called when a new VXLAN interface has freed a port
- * number from the range that is currently in use for VXLAN.  The freed
- * port is removed from the list and the new head is used to determine
- * the port number for offloads.
+ * This function is called when a new UDP tunnel port is deleted. The freed
+ * port will be removed from the list, then we reprogram the offloaded port
+ * based on the head of the list.
  **/
-static void fm10k_del_vxlan_port(struct net_device *dev,
+static void fm10k_udp_tunnel_del(struct net_device *dev,
                                 struct udp_tunnel_info *ti)
 {
        struct fm10k_intfc *interface = netdev_priv(dev);
-       struct fm10k_vxlan_port *vxlan_port;
+       struct fm10k_udp_port *port = NULL;
 
-       if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
-               return;
        if (interface->hw.mac.type != fm10k_mac_pf)
                return;
 
-       /* find the port in the list and free it */
-       fm10k_vxlan_port_for_each(vxlan_port, interface) {
-               if ((vxlan_port->port == ti->port) &&
-                   (vxlan_port->sa_family == ti->sa_family)) {
-                       list_del(&vxlan_port->list);
-                       kfree(vxlan_port);
-                       break;
-               }
+       switch (ti->type) {
+       case UDP_TUNNEL_TYPE_VXLAN:
+               port = fm10k_remove_tunnel_port(&interface->vxlan_port, ti);
+               break;
+       case UDP_TUNNEL_TYPE_GENEVE:
+               port = fm10k_remove_tunnel_port(&interface->geneve_port, ti);
+               break;
+       default:
+               return;
        }
 
-       fm10k_restore_vxlan_port(interface);
+       /* if we did remove a port we need to free its memory */
+       kfree(port);
+
+       fm10k_restore_udp_port_info(interface);
 }
 
 /**
@@ -555,7 +597,6 @@ int fm10k_open(struct net_device *netdev)
        if (err)
                goto err_set_queues;
 
-       /* update VXLAN port configuration */
        udp_tunnel_get_rx_info(netdev);
 
        fm10k_up(interface);
@@ -591,7 +632,7 @@ int fm10k_close(struct net_device *netdev)
 
        fm10k_qv_free_irq(interface);
 
-       fm10k_del_vxlan_port_all(interface);
+       fm10k_free_udp_port_info(interface);
 
        fm10k_free_all_tx_resources(interface);
        fm10k_free_all_rx_resources(interface);
@@ -1055,7 +1096,7 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface)
        interface->xcast_mode = xcast_mode;
 
        /* Restore tunnel configuration */
-       fm10k_restore_vxlan_port(interface);
+       fm10k_restore_udp_port_info(interface);
 }
 
 void fm10k_reset_rx_state(struct fm10k_intfc *interface)
@@ -1098,7 +1139,7 @@ static struct rtnl_link_stats64 *fm10k_get_stats64(struct net_device *netdev,
        rcu_read_lock();
 
        for (i = 0; i < interface->num_rx_queues; i++) {
-               ring = ACCESS_ONCE(interface->rx_ring[i]);
+               ring = READ_ONCE(interface->rx_ring[i]);
 
                if (!ring)
                        continue;
@@ -1114,7 +1155,7 @@ static struct rtnl_link_stats64 *fm10k_get_stats64(struct net_device *netdev,
        }
 
        for (i = 0; i < interface->num_tx_queues; i++) {
-               ring = ACCESS_ONCE(interface->tx_ring[i]);
+               ring = READ_ONCE(interface->tx_ring[i]);
 
                if (!ring)
                        continue;
@@ -1299,7 +1340,7 @@ static void *fm10k_dfwd_add_station(struct net_device *dev,
 static void fm10k_dfwd_del_station(struct net_device *dev, void *priv)
 {
        struct fm10k_intfc *interface = netdev_priv(dev);
-       struct fm10k_l2_accel *l2_accel = ACCESS_ONCE(interface->l2_accel);
+       struct fm10k_l2_accel *l2_accel = READ_ONCE(interface->l2_accel);
        struct fm10k_dglort_cfg dglort = { 0 };
        struct fm10k_hw *hw = &interface->hw;
        struct net_device *sdev = priv;
@@ -1375,8 +1416,8 @@ static const struct net_device_ops fm10k_netdev_ops = {
        .ndo_set_vf_vlan        = fm10k_ndo_set_vf_vlan,
        .ndo_set_vf_rate        = fm10k_ndo_set_vf_bw,
        .ndo_get_vf_config      = fm10k_ndo_get_vf_config,
-       .ndo_udp_tunnel_add     = fm10k_add_vxlan_port,
-       .ndo_udp_tunnel_del     = fm10k_del_vxlan_port,
+       .ndo_udp_tunnel_add     = fm10k_udp_tunnel_add,
+       .ndo_udp_tunnel_del     = fm10k_udp_tunnel_del,
        .ndo_dfwd_add_station   = fm10k_dfwd_add_station,
        .ndo_dfwd_del_station   = fm10k_dfwd_del_station,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index 774a565..b1a2f84 100644 (file)
@@ -62,7 +62,7 @@ u16 fm10k_read_pci_cfg_word(struct fm10k_hw *hw, u32 reg)
 
 u32 fm10k_read_reg(struct fm10k_hw *hw, int reg)
 {
-       u32 __iomem *hw_addr = ACCESS_ONCE(hw->hw_addr);
+       u32 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
        u32 value = 0;
 
        if (FM10K_REMOVED(hw_addr))
@@ -133,7 +133,7 @@ static void fm10k_detach_subtask(struct fm10k_intfc *interface)
        /* check the real address space to see if we've recovered */
        hw_addr = READ_ONCE(interface->uc_addr);
        value = readl(hw_addr);
-       if ((~value)) {
+       if (~value) {
                interface->hw.hw_addr = interface->uc_addr;
                netif_device_attach(netdev);
                interface->flags |= FM10K_FLAG_RESET_REQUESTED;
@@ -734,15 +734,15 @@ static void fm10k_configure_rx_ring(struct fm10k_intfc *interface,
        u64 rdba = ring->dma;
        struct fm10k_hw *hw = &interface->hw;
        u32 size = ring->count * sizeof(union fm10k_rx_desc);
-       u32 rxqctl = FM10K_RXQCTL_ENABLE | FM10K_RXQCTL_PF;
-       u32 rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY;
+       u32 rxqctl, rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY;
        u32 srrctl = FM10K_SRRCTL_BUFFER_CHAINING_EN;
        u32 rxint = FM10K_INT_MAP_DISABLE;
        u8 rx_pause = interface->rx_pause;
        u8 reg_idx = ring->reg_idx;
 
        /* disable queue to avoid issues while updating state */
-       fm10k_write_reg(hw, FM10K_RXQCTL(reg_idx), 0);
+       rxqctl = fm10k_read_reg(hw, FM10K_RXQCTL(reg_idx));
+       rxqctl &= ~FM10K_RXQCTL_ENABLE;
        fm10k_write_flush(hw);
 
        /* possible poll here to verify ring resources have been cleaned */
@@ -797,6 +797,8 @@ static void fm10k_configure_rx_ring(struct fm10k_intfc *interface,
        fm10k_write_reg(hw, FM10K_RXINT(reg_idx), rxint);
 
        /* enable queue */
+       rxqctl = fm10k_read_reg(hw, FM10K_RXQCTL(reg_idx));
+       rxqctl |= FM10K_RXQCTL_ENABLE;
        fm10k_write_reg(hw, FM10K_RXQCTL(reg_idx), rxqctl);
 
        /* place buffers on ring for receive data */
@@ -1699,7 +1701,7 @@ void fm10k_down(struct fm10k_intfc *interface)
 
                /* start checking at the last ring to have pending Tx */
                for (; i < interface->num_tx_queues; i++)
-                       if (fm10k_get_tx_pending(interface->tx_ring[i]))
+                       if (fm10k_get_tx_pending(interface->tx_ring[i], false))
                                break;
 
                /* if all the queues are drained, we can break now */
@@ -1835,8 +1837,9 @@ static int fm10k_sw_init(struct fm10k_intfc *interface,
        interface->tx_itr = FM10K_TX_ITR_DEFAULT;
        interface->rx_itr = FM10K_ITR_ADAPTIVE | FM10K_RX_ITR_DEFAULT;
 
-       /* initialize vxlan_port list */
+       /* initialize udp port lists */
        INIT_LIST_HEAD(&interface->vxlan_port);
+       INIT_LIST_HEAD(&interface->geneve_port);
 
        netdev_rss_key_fill(rss_key, sizeof(rss_key));
        memcpy(interface->rssrk, rss_key, sizeof(rss_key));
@@ -1950,9 +1953,18 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        struct fm10k_intfc *interface;
        int err;
 
+       if (pdev->error_state != pci_channel_io_normal) {
+               dev_err(&pdev->dev,
+                       "PCI device still in an error state. Unable to load...\n");
+               return -EIO;
+       }
+
        err = pci_enable_device_mem(pdev);
-       if (err)
+       if (err) {
+               dev_err(&pdev->dev,
+                       "PCI enable device failed: %d\n", err);
                return err;
+       }
 
        err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
        if (err)
@@ -2275,7 +2287,7 @@ static pci_ers_result_t fm10k_io_slot_reset(struct pci_dev *pdev)
 {
        pci_ers_result_t result;
 
-       if (pci_enable_device_mem(pdev)) {
+       if (pci_reenable_device(pdev)) {
                dev_err(&pdev->dev,
                        "Cannot re-enable PCI device after reset.\n");
                result = PCI_ERS_RESULT_DISCONNECT;
index 682299d..23fb319 100644 (file)
@@ -867,10 +867,6 @@ static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw,
        vf_q_idx = fm10k_vf_queue_index(hw, vf_idx);
        qmap_idx = qmap_stride * vf_idx;
 
-       /* MAP Tx queue back to 0 temporarily, and disable it */
-       fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), 0);
-       fm10k_write_reg(hw, FM10K_TXDCTL(vf_q_idx), 0);
-
        /* Determine correct default VLAN ID. The FM10K_VLAN_OVERRIDE bit is
         * used here to indicate to the VF that it will not have privilege to
         * write VLAN_TABLE. All policy is enforced on the PF but this allows
@@ -886,9 +882,35 @@ static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw,
        fm10k_tlv_attr_put_mac_vlan(msg, FM10K_MAC_VLAN_MSG_DEFAULT_MAC,
                                    vf_info->mac, vf_vid);
 
-       /* load onto outgoing mailbox, ignore any errors on enqueue */
-       if (vf_info->mbx.ops.enqueue_tx)
-               vf_info->mbx.ops.enqueue_tx(hw, &vf_info->mbx, msg);
+       /* Configure Queue control register with new VLAN ID. The TXQCTL
+        * register is RO from the VF, so the PF must do this even in the
+        * case of notifying the VF of a new VID via the mailbox.
+        */
+       txqctl = ((u32)vf_vid << FM10K_TXQCTL_VID_SHIFT) &
+                FM10K_TXQCTL_VID_MASK;
+       txqctl |= (vf_idx << FM10K_TXQCTL_TC_SHIFT) |
+                 FM10K_TXQCTL_VF | vf_idx;
+
+       for (i = 0; i < queues_per_pool; i++)
+               fm10k_write_reg(hw, FM10K_TXQCTL(vf_q_idx + i), txqctl);
+
+       /* try loading a message onto outgoing mailbox first */
+       if (vf_info->mbx.ops.enqueue_tx) {
+               err = vf_info->mbx.ops.enqueue_tx(hw, &vf_info->mbx, msg);
+               if (err != FM10K_MBX_ERR_NO_MBX)
+                       return err;
+               err = 0;
+       }
+
+       /* If we aren't connected to a mailbox, this is most likely because
+        * the VF driver is not running. It should thus be safe to re-map
+        * queues and use the registers to pass the MAC address so that the VF
+        * driver gets correct information during its initialization.
+        */
+
+       /* MAP Tx queue back to 0 temporarily, and disable it */
+       fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), 0);
+       fm10k_write_reg(hw, FM10K_TXDCTL(vf_q_idx), 0);
 
        /* verify ring has disabled before modifying base address registers */
        txdctl = fm10k_read_reg(hw, FM10K_TXDCTL(vf_q_idx));
@@ -927,16 +949,6 @@ static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw,
                                                   FM10K_TDLEN_ITR_SCALE_SHIFT);
 
 err_out:
-       /* configure Queue control register */
-       txqctl = ((u32)vf_vid << FM10K_TXQCTL_VID_SHIFT) &
-                FM10K_TXQCTL_VID_MASK;
-       txqctl |= (vf_idx << FM10K_TXQCTL_TC_SHIFT) |
-                 FM10K_TXQCTL_VF | vf_idx;
-
-       /* assign VLAN ID */
-       for (i = 0; i < queues_per_pool; i++)
-               fm10k_write_reg(hw, FM10K_TXQCTL(vf_q_idx + i), txqctl);
-
        /* restore the queue back to VF ownership */
        fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), vf_q_idx);
        return err;
index f4e75c4..6bb16c1 100644 (file)
@@ -154,6 +154,7 @@ struct fm10k_hw;
 #define FM10K_DGLORTDEC_INNERRSS_ENABLE                0x08000000
 #define FM10K_TUNNEL_CFG       0x0040
 #define FM10K_TUNNEL_CFG_NVGRE_SHIFT           16
+#define FM10K_TUNNEL_CFG_GENEVE        0x0041
 #define FM10K_SWPRI_MAP(_n)    ((_n) + 0x0050)
 #define FM10K_SWPRI_MAX                16
 #define FM10K_RSSRK(_n, _m)    (((_n) * 0x10) + (_m) + 0x0800)
index 2a88291..19103a6 100644 (file)
 #include "i40e_dcb.h"
 
 /* Useful i40e defaults */
-#define I40E_MAX_VEB          16
-
-#define I40E_MAX_NUM_DESCRIPTORS      4096
-#define I40E_MAX_CSR_SPACE (4 * 1024 * 1024 - 64 * 1024)
-#define I40E_DEFAULT_NUM_DESCRIPTORS  512
-#define I40E_REQ_DESCRIPTOR_MULTIPLE  32
-#define I40E_MIN_NUM_DESCRIPTORS      64
-#define I40E_MIN_MSIX                 2
-#define I40E_DEFAULT_NUM_VMDQ_VSI     8 /* max 256 VSIs */
-#define I40E_MIN_VSI_ALLOC            51 /* LAN, ATR, FCOE, 32 VF, 16 VMDQ */
+#define I40E_MAX_VEB                   16
+
+#define I40E_MAX_NUM_DESCRIPTORS       4096
+#define I40E_MAX_CSR_SPACE             (4 * 1024 * 1024 - 64 * 1024)
+#define I40E_DEFAULT_NUM_DESCRIPTORS   512
+#define I40E_REQ_DESCRIPTOR_MULTIPLE   32
+#define I40E_MIN_NUM_DESCRIPTORS       64
+#define I40E_MIN_MSIX                  2
+#define I40E_DEFAULT_NUM_VMDQ_VSI      8 /* max 256 VSIs */
+#define I40E_MIN_VSI_ALLOC             51 /* LAN, ATR, FCOE, 32 VF, 16 VMDQ */
 /* max 16 qps */
 #define i40e_default_queues_per_vmdq(pf) \
                (((pf)->flags & I40E_FLAG_RSS_AQ_CAPABLE) ? 4 : 1)
-#define I40E_DEFAULT_QUEUES_PER_VF    4
-#define I40E_DEFAULT_QUEUES_PER_TC    1 /* should be a power of 2 */
+#define I40E_DEFAULT_QUEUES_PER_VF     4
+#define I40E_DEFAULT_QUEUES_PER_TC     1 /* should be a power of 2 */
 #define i40e_pf_get_max_q_per_tc(pf) \
                (((pf)->flags & I40E_FLAG_128_QP_RSS_CAPABLE) ? 128 : 64)
-#define I40E_FDIR_RING                0
-#define I40E_FDIR_RING_COUNT          32
+#define I40E_FDIR_RING                 0
+#define I40E_FDIR_RING_COUNT           32
 #ifdef I40E_FCOE
-#define I40E_DEFAULT_FCOE             8 /* default number of QPs for FCoE */
-#define I40E_MINIMUM_FCOE             1 /* minimum number of QPs for FCoE */
+#define I40E_DEFAULT_FCOE              8 /* default number of QPs for FCoE */
+#define I40E_MINIMUM_FCOE              1 /* minimum number of QPs for FCoE */
 #endif /* I40E_FCOE */
-#define I40E_MAX_AQ_BUF_SIZE          4096
-#define I40E_AQ_LEN                   256
-#define I40E_AQ_WORK_LIMIT            66 /* max number of VFs + a little */
-#define I40E_MAX_USER_PRIORITY        8
-#define I40E_DEFAULT_MSG_ENABLE       4
-#define I40E_QUEUE_WAIT_RETRY_LIMIT   10
-#define I40E_INT_NAME_STR_LEN        (IFNAMSIZ + 16)
+#define I40E_MAX_AQ_BUF_SIZE           4096
+#define I40E_AQ_LEN                    256
+#define I40E_AQ_WORK_LIMIT             66 /* max number of VFs + a little */
+#define I40E_MAX_USER_PRIORITY         8
+#define I40E_DEFAULT_MSG_ENABLE                4
+#define I40E_QUEUE_WAIT_RETRY_LIMIT    10
+#define I40E_INT_NAME_STR_LEN          (IFNAMSIZ + 16)
 
 /* Ethtool Private Flags */
-#define        I40E_PRIV_FLAGS_MFP_FLAG                BIT(0)
-#define        I40E_PRIV_FLAGS_LINKPOLL_FLAG           BIT(1)
+#define I40E_PRIV_FLAGS_MFP_FLAG               BIT(0)
+#define I40E_PRIV_FLAGS_LINKPOLL_FLAG          BIT(1)
 #define I40E_PRIV_FLAGS_FD_ATR                 BIT(2)
 #define I40E_PRIV_FLAGS_VEB_STATS              BIT(3)
 #define I40E_PRIV_FLAGS_HW_ATR_EVICT           BIT(4)
 #define I40E_PRIV_FLAGS_TRUE_PROMISC_SUPPORT   BIT(5)
 
-#define I40E_NVM_VERSION_LO_SHIFT  0
-#define I40E_NVM_VERSION_LO_MASK   (0xff << I40E_NVM_VERSION_LO_SHIFT)
-#define I40E_NVM_VERSION_HI_SHIFT  12
-#define I40E_NVM_VERSION_HI_MASK   (0xf << I40E_NVM_VERSION_HI_SHIFT)
-#define I40E_OEM_VER_BUILD_MASK    0xffff
-#define I40E_OEM_VER_PATCH_MASK    0xff
-#define I40E_OEM_VER_BUILD_SHIFT   8
-#define I40E_OEM_VER_SHIFT         24
+#define I40E_NVM_VERSION_LO_SHIFT      0
+#define I40E_NVM_VERSION_LO_MASK       (0xff << I40E_NVM_VERSION_LO_SHIFT)
+#define I40E_NVM_VERSION_HI_SHIFT      12
+#define I40E_NVM_VERSION_HI_MASK       (0xf << I40E_NVM_VERSION_HI_SHIFT)
+#define I40E_OEM_VER_BUILD_MASK                0xffff
+#define I40E_OEM_VER_PATCH_MASK                0xff
+#define I40E_OEM_VER_BUILD_SHIFT       8
+#define I40E_OEM_VER_SHIFT             24
 #define I40E_PHY_DEBUG_ALL \
        (I40E_AQ_PHY_DEBUG_DISABLE_LINK_FW | \
        I40E_AQ_PHY_DEBUG_DISABLE_ALL_LINK_FW)
 
 /* The values in here are decimal coded as hex as is the case in the NVM map*/
-#define I40E_CURRENT_NVM_VERSION_HI 0x2
-#define I40E_CURRENT_NVM_VERSION_LO 0x40
+#define I40E_CURRENT_NVM_VERSION_HI    0x2
+#define I40E_CURRENT_NVM_VERSION_LO    0x40
 
-/* magic for getting defines into strings */
-#define STRINGIFY(foo)  #foo
-#define XSTRINGIFY(bar) STRINGIFY(bar)
-
-#define I40E_RX_DESC(R, i)                     \
+#define I40E_RX_DESC(R, i)     \
        (&(((union i40e_32byte_rx_desc *)((R)->desc))[i]))
-#define I40E_TX_DESC(R, i)                     \
+#define I40E_TX_DESC(R, i)     \
        (&(((struct i40e_tx_desc *)((R)->desc))[i]))
-#define I40E_TX_CTXTDESC(R, i)                 \
+#define I40E_TX_CTXTDESC(R, i) \
        (&(((struct i40e_tx_context_desc *)((R)->desc))[i]))
-#define I40E_TX_FDIRDESC(R, i)                 \
+#define I40E_TX_FDIRDESC(R, i) \
        (&(((struct i40e_filter_program_desc *)((R)->desc))[i]))
 
 /* default to trying for four seconds */
-#define I40E_TRY_LINK_TIMEOUT (4 * HZ)
+#define I40E_TRY_LINK_TIMEOUT  (4 * HZ)
 
 /**
  * i40e_is_mac_710 - Return true if MAC is X710/XL710
@@ -199,9 +195,9 @@ struct i40e_lump_tracking {
 #define I40E_FDIR_BUFFER_HEAD_ROOM     32
 #define I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR (I40E_FDIR_BUFFER_HEAD_ROOM * 4)
 
-#define I40E_HKEY_ARRAY_SIZE ((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4)
-#define I40E_HLUT_ARRAY_SIZE ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4)
-#define I40E_VF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT1_MAX_INDEX + 1) * 4)
+#define I40E_HKEY_ARRAY_SIZE   ((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4)
+#define I40E_HLUT_ARRAY_SIZE   ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4)
+#define I40E_VF_HLUT_ARRAY_SIZE        ((I40E_VFQF_HLUT1_MAX_INDEX + 1) * 4)
 
 enum i40e_fd_stat_idx {
        I40E_FD_STAT_ATR,
@@ -387,8 +383,8 @@ struct i40e_pf {
        struct mutex switch_mutex;
        u16 lan_vsi;       /* our default LAN VSI */
        u16 lan_veb;       /* initial relay, if exists */
-#define I40E_NO_VEB   0xffff
-#define I40E_NO_VSI   0xffff
+#define I40E_NO_VEB    0xffff
+#define I40E_NO_VSI    0xffff
        u16 next_vsi;      /* Next unallocated VSI - 0-based! */
        struct i40e_vsi **vsi;
        struct i40e_veb *veb[I40E_MAX_VEB];
@@ -423,8 +419,8 @@ struct i40e_pf {
         */
        u16 dcbx_cap;
 
-       u32     fcoe_hmc_filt_num;
-       u32     fcoe_hmc_cntx_num;
+       u32 fcoe_hmc_filt_num;
+       u32 fcoe_hmc_cntx_num;
        struct i40e_filter_control_settings filter_settings;
 
        struct ptp_clock *ptp_clock;
@@ -470,10 +466,10 @@ struct i40e_mac_filter {
 struct i40e_veb {
        struct i40e_pf *pf;
        u16 idx;
-       u16 veb_idx;           /* index of VEB parent */
+       u16 veb_idx;            /* index of VEB parent */
        u16 seid;
        u16 uplink_seid;
-       u16 stats_idx;           /* index of VEB parent */
+       u16 stats_idx;          /* index of VEB parent */
        u8  enabled_tc;
        u16 bridge_mode;        /* Bridge Mode (VEB/VEPA) */
        u16 flags;
@@ -534,12 +530,13 @@ struct i40e_vsi {
        u32  promisc_threshold;
 
        u16 work_limit;
-       u16 int_rate_limit;  /* value in usecs */
+       u16 int_rate_limit;     /* value in usecs */
+
+       u16 rss_table_size;     /* HW RSS table size */
+       u16 rss_size;           /* Allocated RSS queues */
+       u8  *rss_hkey_user;     /* User configured hash keys */
+       u8  *rss_lut_user;      /* User configured lookup table entries */
 
-       u16 rss_table_size; /* HW RSS table size */
-       u16 rss_size;       /* Allocated RSS queues */
-       u8  *rss_hkey_user; /* User configured hash keys */
-       u8  *rss_lut_user;  /* User configured lookup table entries */
 
        u16 max_frame;
        u16 rx_buf_len;
@@ -550,14 +547,14 @@ struct i40e_vsi {
        int base_vector;
        bool irqs_ready;
 
-       u16 seid;            /* HW index of this VSI (absolute index) */
-       u16 id;              /* VSI number */
+       u16 seid;               /* HW index of this VSI (absolute index) */
+       u16 id;                 /* VSI number */
        u16 uplink_seid;
 
-       u16 base_queue;      /* vsi's first queue in hw array */
-       u16 alloc_queue_pairs; /* Allocated Tx/Rx queues */
-       u16 req_queue_pairs; /* User requested queue pairs */
-       u16 num_queue_pairs; /* Used tx and rx pairs */
+       u16 base_queue;         /* vsi's first queue in hw array */
+       u16 alloc_queue_pairs;  /* Allocated Tx/Rx queues */
+       u16 req_queue_pairs;    /* User requested queue pairs */
+       u16 num_queue_pairs;    /* Used tx and rx pairs */
        u16 num_desc;
        enum i40e_vsi_type type;  /* VSI type, e.g., LAN, FCoE, etc */
        s16 vf_id;              /* Virtual function ID for SRIOV VSIs */
@@ -576,19 +573,16 @@ struct i40e_vsi {
        /* TC BW limit max quanta within VSI */
        u8  bw_ets_max_quanta[I40E_MAX_TRAFFIC_CLASS];
 
-       struct i40e_pf *back;  /* Backreference to associated PF */
-       u16 idx;               /* index in pf->vsi[] */
-       u16 veb_idx;           /* index of VEB parent */
-       struct kobject *kobj;  /* sysfs object */
-       bool current_isup;     /* Sync 'link up' logging */
+       struct i40e_pf *back;   /* Backreference to associated PF */
+       u16 idx;                /* index in pf->vsi[] */
+       u16 veb_idx;            /* index of VEB parent */
+       struct kobject *kobj;   /* sysfs object */
+       bool current_isup;      /* Sync 'link up' logging */
 
        void *priv;     /* client driver data reference. */
 
        /* VSI specific handlers */
        irqreturn_t (*irq_handler)(int irq, void *data);
-
-       /* current rxnfc data */
-       struct ethtool_rxnfc rxnfc; /* current rss hash opts */
 } ____cacheline_internodealigned_in_smp;
 
 struct i40e_netdev_priv {
index 11cf1a5..67e396b 100644 (file)
@@ -204,6 +204,9 @@ enum i40e_admin_queue_opc {
        i40e_aqc_opc_suspend_port_tx                            = 0x041B,
        i40e_aqc_opc_resume_port_tx                             = 0x041C,
        i40e_aqc_opc_configure_partition_bw                     = 0x041D,
+       /* hmc */
+       i40e_aqc_opc_query_hmc_resource_profile = 0x0500,
+       i40e_aqc_opc_set_hmc_resource_profile   = 0x0501,
 
        /* phy commands*/
        i40e_aqc_opc_get_phy_abilities          = 0x0600,
@@ -450,13 +453,15 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_cppm_configuration);
 /* Set ARP Proxy command / response (indirect 0x0104) */
 struct i40e_aqc_arp_proxy_data {
        __le16  command_flags;
-#define I40E_AQ_ARP_INIT_IPV4  0x0008
-#define I40E_AQ_ARP_UNSUP_CTL  0x0010
-#define I40E_AQ_ARP_ENA                0x0020
-#define I40E_AQ_ARP_ADD_IPV4   0x0040
-#define I40E_AQ_ARP_DEL_IPV4   0x0080
+#define I40E_AQ_ARP_INIT_IPV4  0x0800
+#define I40E_AQ_ARP_UNSUP_CTL  0x1000
+#define I40E_AQ_ARP_ENA                0x2000
+#define I40E_AQ_ARP_ADD_IPV4   0x4000
+#define I40E_AQ_ARP_DEL_IPV4   0x8000
        __le16  table_id;
-       __le32  pfpm_proxyfc;
+       __le32  enabled_offloads;
+#define I40E_AQ_ARP_DIRECTED_OFFLOAD_ENABLE    0x00000020
+#define I40E_AQ_ARP_OFFLOAD_ENABLE             0x00000800
        __le32  ip_addr;
        u8      mac_addr[6];
        u8      reserved[2];
@@ -471,17 +476,19 @@ struct i40e_aqc_ns_proxy_data {
        __le16  table_idx_ipv6_0;
        __le16  table_idx_ipv6_1;
        __le16  control;
-#define I40E_AQ_NS_PROXY_ADD_0         0x0100
-#define I40E_AQ_NS_PROXY_DEL_0         0x0200
-#define I40E_AQ_NS_PROXY_ADD_1         0x0400
-#define I40E_AQ_NS_PROXY_DEL_1         0x0800
-#define I40E_AQ_NS_PROXY_ADD_IPV6_0    0x1000
-#define I40E_AQ_NS_PROXY_DEL_IPV6_0    0x2000
-#define I40E_AQ_NS_PROXY_ADD_IPV6_1    0x4000
-#define I40E_AQ_NS_PROXY_DEL_IPV6_1    0x8000
-#define I40E_AQ_NS_PROXY_COMMAND_SEQ   0x0001
-#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0002
-#define I40E_AQ_NS_PROXY_INIT_MAC_TBL  0x0004
+#define I40E_AQ_NS_PROXY_ADD_0         0x0001
+#define I40E_AQ_NS_PROXY_DEL_0         0x0002
+#define I40E_AQ_NS_PROXY_ADD_1         0x0004
+#define I40E_AQ_NS_PROXY_DEL_1         0x0008
+#define I40E_AQ_NS_PROXY_ADD_IPV6_0    0x0010
+#define I40E_AQ_NS_PROXY_DEL_IPV6_0    0x0020
+#define I40E_AQ_NS_PROXY_ADD_IPV6_1    0x0040
+#define I40E_AQ_NS_PROXY_DEL_IPV6_1    0x0080
+#define I40E_AQ_NS_PROXY_COMMAND_SEQ   0x0100
+#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0200
+#define I40E_AQ_NS_PROXY_INIT_MAC_TBL  0x0400
+#define I40E_AQ_NS_PROXY_OFFLOAD_ENABLE        0x0800
+#define I40E_AQ_NS_PROXY_DIRECTED_OFFLOAD_ENABLE       0x1000
        u8      mac_addr_0[6];
        u8      mac_addr_1[6];
        u8      local_mac_addr[6];
@@ -1582,6 +1589,24 @@ struct i40e_aqc_configure_partition_bw_data {
 
 I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data);
 
+/* Get and set the active HMC resource profile and status.
+ * (direct 0x0500) and (direct 0x0501)
+ */
+struct i40e_aq_get_set_hmc_resource_profile {
+       u8      pm_profile;
+       u8      pe_vf_enabled;
+       u8      reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aq_get_set_hmc_resource_profile);
+
+enum i40e_aq_hmc_profile {
+       /* I40E_HMC_PROFILE_NO_CHANGE   = 0, reserved */
+       I40E_HMC_PROFILE_DEFAULT        = 1,
+       I40E_HMC_PROFILE_FAVOR_VF       = 2,
+       I40E_HMC_PROFILE_EQUAL          = 3,
+};
+
 /* Get PHY Abilities (indirect 0x0600) uses the generic indirect struct */
 
 /* set in param0 for get phy abilities to report qualified modules */
index e1370c5..250db0b 100644 (file)
@@ -148,6 +148,11 @@ i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id, u8 *msg, u16 len)
                                        "Cannot locate client instance virtual channel receive routine\n");
                                continue;
                        }
+                       if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
+                                     &cdev->state)) {
+                               dev_dbg(&vsi->back->pdev->dev, "Client is not open, abort virtchnl_receive\n");
+                               continue;
+                       }
                        cdev->client->ops->virtchnl_receive(&cdev->lan_info,
                                                            cdev->client,
                                                            vf_id, msg, len);
@@ -181,6 +186,11 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi)
                                        "Cannot locate client instance l2_param_change routine\n");
                                continue;
                        }
+                       if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
+                                     &cdev->state)) {
+                               dev_dbg(&vsi->back->pdev->dev, "Client is not open, abort l2 param change\n");
+                               continue;
+                       }
                        cdev->lan_info.params = params;
                        cdev->client->ops->l2_param_change(&cdev->lan_info,
                                                           cdev->client,
@@ -199,6 +209,7 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi)
 void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi)
 {
        struct i40e_client_instance *cdev;
+       int ret = 0;
 
        if (!vsi)
                return;
@@ -211,7 +222,14 @@ void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi)
                                        "Cannot locate client instance open routine\n");
                                continue;
                        }
-                       cdev->client->ops->open(&cdev->lan_info, cdev->client);
+                       if (!(test_bit(__I40E_CLIENT_INSTANCE_OPENED,
+                                      &cdev->state))) {
+                               ret = cdev->client->ops->open(&cdev->lan_info,
+                                                             cdev->client);
+                               if (!ret)
+                                       set_bit(__I40E_CLIENT_INSTANCE_OPENED,
+                                               &cdev->state);
+                       }
                }
        }
        mutex_unlock(&i40e_client_instance_mutex);
@@ -298,6 +316,11 @@ void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id)
                                        "Cannot locate client instance VF reset routine\n");
                                continue;
                        }
+                       if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
+                                     &cdev->state)) {
+                               dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-reset\n");
+                               continue;
+                       }
                        cdev->client->ops->vf_reset(&cdev->lan_info,
                                                    cdev->client, vf_id);
                }
@@ -328,6 +351,11 @@ void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs)
                                        "Cannot locate client instance VF enable routine\n");
                                continue;
                        }
+                       if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
+                                     &cdev->state)) {
+                               dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-enable\n");
+                               continue;
+                       }
                        cdev->client->ops->vf_enable(&cdev->lan_info,
                                                     cdev->client, num_vfs);
                }
@@ -362,6 +390,11 @@ int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id,
                                        "Cannot locate client instance VF capability routine\n");
                                continue;
                        }
+                       if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
+                                     &cdev->state)) {
+                               dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-capable\n");
+                               continue;
+                       }
                        capable = cdev->client->ops->vf_capable(&cdev->lan_info,
                                                                cdev->client,
                                                                vf_id);
@@ -407,12 +440,14 @@ struct i40e_vsi *i40e_vsi_lookup(struct i40e_pf *pf,
  * i40e_client_add_instance - add a client instance struct to the instance list
  * @pf: pointer to the board struct
  * @client: pointer to a client struct in the client list.
+ * @existing: if there was already an existing instance
  *
- * Returns cdev ptr on success, NULL on failure
+ * Returns cdev ptr on success or if already exists, NULL on failure
  **/
 static
 struct i40e_client_instance *i40e_client_add_instance(struct i40e_pf *pf,
-                                                     struct i40e_client *client)
+                                                    struct i40e_client *client,
+                                                    bool *existing)
 {
        struct i40e_client_instance *cdev;
        struct netdev_hw_addr *mac = NULL;
@@ -421,7 +456,7 @@ struct i40e_client_instance *i40e_client_add_instance(struct i40e_pf *pf,
        mutex_lock(&i40e_client_instance_mutex);
        list_for_each_entry(cdev, &i40e_client_instances, list) {
                if ((cdev->lan_info.pf == pf) && (cdev->client == client)) {
-                       cdev = NULL;
+                       *existing = true;
                        goto out;
                }
        }
@@ -505,6 +540,7 @@ void i40e_client_subtask(struct i40e_pf *pf)
 {
        struct i40e_client_instance *cdev;
        struct i40e_client *client;
+       bool existing = false;
        int ret = 0;
 
        if (!(pf->flags & I40E_FLAG_SERVICE_CLIENT_REQUESTED))
@@ -528,19 +564,27 @@ void i40e_client_subtask(struct i40e_pf *pf)
                        /* check if L2 VSI is up, if not we are not ready */
                        if (test_bit(__I40E_DOWN, &pf->vsi[pf->lan_vsi]->state))
                                continue;
+               } else {
+                       dev_warn(&pf->pdev->dev, "This client %s is being instanciated at probe\n",
+                                client->name);
                }
 
                /* Add the client instance to the instance list */
-               cdev = i40e_client_add_instance(pf, client);
+               cdev = i40e_client_add_instance(pf, client, &existing);
                if (!cdev)
                        continue;
 
-               /* Also up the ref_cnt of no. of instances of this client */
-               atomic_inc(&client->ref_cnt);
-               dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n",
-                        client->name, pf->hw.pf_id,
-                        pf->hw.bus.device, pf->hw.bus.func);
+               if (!existing) {
+                       /* Also up the ref_cnt for no. of instances of this
+                        * client.
+                        */
+                       atomic_inc(&client->ref_cnt);
+                       dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n",
+                                client->name, pf->hw.pf_id,
+                                pf->hw.bus.device, pf->hw.bus.func);
+               }
 
+               mutex_lock(&i40e_client_instance_mutex);
                /* Send an Open request to the client */
                atomic_inc(&cdev->ref_cnt);
                if (client->ops && client->ops->open)
@@ -550,10 +594,12 @@ void i40e_client_subtask(struct i40e_pf *pf)
                        set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
                } else {
                        /* remove client instance */
+                       mutex_unlock(&i40e_client_instance_mutex);
                        i40e_client_del_instance(pf, client);
                        atomic_dec(&client->ref_cnt);
                        continue;
                }
+               mutex_unlock(&i40e_client_instance_mutex);
        }
        mutex_unlock(&i40e_client_mutex);
 }
@@ -588,7 +634,8 @@ int i40e_lan_add_device(struct i40e_pf *pf)
                 pf->hw.pf_id, pf->hw.bus.device, pf->hw.bus.func);
 
        /* Since in some cases register may have happened before a device gets
-        * added, we can schedule a subtask to go initiate the clients.
+        * added, we can schedule a subtask to go initiate the clients if
+        * they can be launched at probe time.
         */
        pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
        i40e_service_event_schedule(pf);
@@ -635,7 +682,7 @@ int i40e_lan_del_device(struct i40e_pf *pf)
 static int i40e_client_release(struct i40e_client *client)
 {
        struct i40e_client_instance *cdev, *tmp;
-       struct i40e_pf *pf = NULL;
+       struct i40e_pf *pf;
        int ret = 0;
 
        LIST_HEAD(cdevs_tmp);
@@ -645,12 +692,12 @@ static int i40e_client_release(struct i40e_client *client)
                if (strncmp(cdev->client->name, client->name,
                            I40E_CLIENT_STR_LENGTH))
                        continue;
+               pf = (struct i40e_pf *)cdev->lan_info.pf;
                if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
                        if (atomic_read(&cdev->ref_cnt) > 0) {
                                ret = I40E_ERR_NOT_READY;
                                goto out;
                        }
-                       pf = (struct i40e_pf *)cdev->lan_info.pf;
                        if (client->ops && client->ops->close)
                                client->ops->close(&cdev->lan_info, client,
                                                   false);
@@ -662,8 +709,7 @@ static int i40e_client_release(struct i40e_client *client)
                                 client->name, pf->hw.pf_id);
                }
                /* delete the client instance from the list */
-               list_del(&cdev->list);
-               list_add(&cdev->list, &cdevs_tmp);
+               list_move(&cdev->list, &cdevs_tmp);
                atomic_dec(&client->ref_cnt);
                dev_info(&pf->pdev->dev, "Deleted client instance of Client %s\n",
                         client->name);
@@ -792,7 +838,8 @@ static int i40e_client_setup_qvlist(struct i40e_info *ldev,
                        wr32(hw, I40E_PFINT_AEQCTL, reg);
                }
        }
-
+       /* Mitigate sync problems with iwarp VF driver */
+       i40e_flush(hw);
        return 0;
 err:
        kfree(ldev->qvlist_info);
@@ -990,7 +1037,6 @@ int i40e_unregister_client(struct i40e_client *client)
        if (!i40e_client_is_registered(client)) {
                pr_info("i40e: Client %s has not been registered\n",
                        client->name);
-               mutex_unlock(&i40e_client_mutex);
                ret = -ENODEV;
                goto out;
        }
index a4601d9..38a6c36 100644 (file)
@@ -36,9 +36,9 @@
 #define I40E_CLIENT_VERSION_MINOR 01
 #define I40E_CLIENT_VERSION_BUILD 00
 #define I40E_CLIENT_VERSION_STR     \
-       XSTRINGIFY(I40E_CLIENT_VERSION_MAJOR) "." \
-       XSTRINGIFY(I40E_CLIENT_VERSION_MINOR) "." \
-       XSTRINGIFY(I40E_CLIENT_VERSION_BUILD)
+       __stringify(I40E_CLIENT_VERSION_MAJOR) "." \
+       __stringify(I40E_CLIENT_VERSION_MINOR) "." \
+       __stringify(I40E_CLIENT_VERSION_BUILD)
 
 struct i40e_client_version {
        u8 major;
index c912e04..1835186 100644 (file)
@@ -1560,13 +1560,13 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset,
                }
 #endif
                for (i = 0; i < vsi->num_queue_pairs; i++) {
-                       snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_packets", i);
+                       snprintf(p, ETH_GSTRING_LEN, "tx-%d.tx_packets", i);
                        p += ETH_GSTRING_LEN;
-                       snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_bytes", i);
+                       snprintf(p, ETH_GSTRING_LEN, "tx-%d.tx_bytes", i);
                        p += ETH_GSTRING_LEN;
-                       snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_packets", i);
+                       snprintf(p, ETH_GSTRING_LEN, "rx-%d.rx_packets", i);
                        p += ETH_GSTRING_LEN;
-                       snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_bytes", i);
+                       snprintf(p, ETH_GSTRING_LEN, "rx-%d.rx_bytes", i);
                        p += ETH_GSTRING_LEN;
                }
                if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1)
@@ -1581,16 +1581,16 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset,
                        }
                        for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
                                snprintf(p, ETH_GSTRING_LEN,
-                                        "veb.tc_%u_tx_packets", i);
+                                        "veb.tc_%d_tx_packets", i);
                                p += ETH_GSTRING_LEN;
                                snprintf(p, ETH_GSTRING_LEN,
-                                        "veb.tc_%u_tx_bytes", i);
+                                        "veb.tc_%d_tx_bytes", i);
                                p += ETH_GSTRING_LEN;
                                snprintf(p, ETH_GSTRING_LEN,
-                                        "veb.tc_%u_rx_packets", i);
+                                        "veb.tc_%d_rx_packets", i);
                                p += ETH_GSTRING_LEN;
                                snprintf(p, ETH_GSTRING_LEN,
-                                        "veb.tc_%u_rx_bytes", i);
+                                        "veb.tc_%d_rx_bytes", i);
                                p += ETH_GSTRING_LEN;
                        }
                }
@@ -1601,23 +1601,23 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset,
                }
                for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
                        snprintf(p, ETH_GSTRING_LEN,
-                                "port.tx_priority_%u_xon", i);
+                                "port.tx_priority_%d_xon", i);
                        p += ETH_GSTRING_LEN;
                        snprintf(p, ETH_GSTRING_LEN,
-                                "port.tx_priority_%u_xoff", i);
+                                "port.tx_priority_%d_xoff", i);
                        p += ETH_GSTRING_LEN;
                }
                for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
                        snprintf(p, ETH_GSTRING_LEN,
-                                "port.rx_priority_%u_xon", i);
+                                "port.rx_priority_%d_xon", i);
                        p += ETH_GSTRING_LEN;
                        snprintf(p, ETH_GSTRING_LEN,
-                                "port.rx_priority_%u_xoff", i);
+                                "port.rx_priority_%d_xoff", i);
                        p += ETH_GSTRING_LEN;
                }
                for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
                        snprintf(p, ETH_GSTRING_LEN,
-                                "port.rx_priority_%u_xon_2_xoff", i);
+                                "port.rx_priority_%d_xon_2_xoff", i);
                        p += ETH_GSTRING_LEN;
                }
                /* BUG_ON(p - data != I40E_STATS_LEN * ETH_GSTRING_LEN); */
@@ -2141,41 +2141,72 @@ static int i40e_set_per_queue_coalesce(struct net_device *netdev, u32 queue,
  **/
 static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd)
 {
+       struct i40e_hw *hw = &pf->hw;
+       u8 flow_pctype = 0;
+       u64 i_set = 0;
+
        cmd->data = 0;
 
-       if (pf->vsi[pf->lan_vsi]->rxnfc.data != 0) {
-               cmd->data = pf->vsi[pf->lan_vsi]->rxnfc.data;
-               cmd->flow_type = pf->vsi[pf->lan_vsi]->rxnfc.flow_type;
-               return 0;
-       }
-       /* Report default options for RSS on i40e */
        switch (cmd->flow_type) {
        case TCP_V4_FLOW:
+               flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+               break;
        case UDP_V4_FLOW:
-               cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
-       /* fall through to add IP fields */
+               flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
+               break;
+       case TCP_V6_FLOW:
+               flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_TCP;
+               break;
+       case UDP_V6_FLOW:
+               flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_UDP;
+               break;
        case SCTP_V4_FLOW:
        case AH_ESP_V4_FLOW:
        case AH_V4_FLOW:
        case ESP_V4_FLOW:
        case IPV4_FLOW:
-               cmd->data |= RXH_IP_SRC | RXH_IP_DST;
-               break;
-       case TCP_V6_FLOW:
-       case UDP_V6_FLOW:
-               cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
-       /* fall through to add IP fields */
        case SCTP_V6_FLOW:
        case AH_ESP_V6_FLOW:
        case AH_V6_FLOW:
        case ESP_V6_FLOW:
        case IPV6_FLOW:
+               /* Default is src/dest for IP, no matter the L4 hashing */
                cmd->data |= RXH_IP_SRC | RXH_IP_DST;
                break;
        default:
                return -EINVAL;
        }
 
+       /* Read flow based hash input set register */
+       if (flow_pctype) {
+               i_set = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0,
+                                             flow_pctype)) |
+                       ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1,
+                                              flow_pctype)) << 32);
+       }
+
+       /* Process bits of hash input set */
+       if (i_set) {
+               if (i_set & I40E_L4_SRC_MASK)
+                       cmd->data |= RXH_L4_B_0_1;
+               if (i_set & I40E_L4_DST_MASK)
+                       cmd->data |= RXH_L4_B_2_3;
+
+               if (cmd->flow_type == TCP_V4_FLOW ||
+                   cmd->flow_type == UDP_V4_FLOW) {
+                       if (i_set & I40E_L3_SRC_MASK)
+                               cmd->data |= RXH_IP_SRC;
+                       if (i_set & I40E_L3_DST_MASK)
+                               cmd->data |= RXH_IP_DST;
+               } else if (cmd->flow_type == TCP_V6_FLOW ||
+                         cmd->flow_type == UDP_V6_FLOW) {
+                       if (i_set & I40E_L3_V6_SRC_MASK)
+                               cmd->data |= RXH_IP_SRC;
+                       if (i_set & I40E_L3_V6_DST_MASK)
+                               cmd->data |= RXH_IP_DST;
+               }
+       }
+
        return 0;
 }
 
@@ -2317,6 +2348,51 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
        return ret;
 }
 
+/**
+ * i40e_get_rss_hash_bits - Read RSS Hash bits from register
+ * @nfc: pointer to user request
+ * @i_setc bits currently set
+ *
+ * Returns value of bits to be set per user request
+ **/
+static u64 i40e_get_rss_hash_bits(struct ethtool_rxnfc *nfc, u64 i_setc)
+{
+       u64 i_set = i_setc;
+       u64 src_l3 = 0, dst_l3 = 0;
+
+       if (nfc->data & RXH_L4_B_0_1)
+               i_set |= I40E_L4_SRC_MASK;
+       else
+               i_set &= ~I40E_L4_SRC_MASK;
+       if (nfc->data & RXH_L4_B_2_3)
+               i_set |= I40E_L4_DST_MASK;
+       else
+               i_set &= ~I40E_L4_DST_MASK;
+
+       if (nfc->flow_type == TCP_V6_FLOW || nfc->flow_type == UDP_V6_FLOW) {
+               src_l3 = I40E_L3_V6_SRC_MASK;
+               dst_l3 = I40E_L3_V6_DST_MASK;
+       } else if (nfc->flow_type == TCP_V4_FLOW ||
+                 nfc->flow_type == UDP_V4_FLOW) {
+               src_l3 = I40E_L3_SRC_MASK;
+               dst_l3 = I40E_L3_DST_MASK;
+       } else {
+               /* Any other flow type are not supported here */
+               return i_set;
+       }
+
+       if (nfc->data & RXH_IP_SRC)
+               i_set |= src_l3;
+       else
+               i_set &= ~src_l3;
+       if (nfc->data & RXH_IP_DST)
+               i_set |= dst_l3;
+       else
+               i_set &= ~dst_l3;
+
+       return i_set;
+}
+
 /**
  * i40e_set_rss_hash_opt - Enable/Disable flow types for RSS hash
  * @pf: pointer to the physical function struct
@@ -2329,6 +2405,8 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
        struct i40e_hw *hw = &pf->hw;
        u64 hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) |
                   ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32);
+       u8 flow_pctype = 0;
+       u64 i_set, i_setc;
 
        /* RSS does not support anything other than hashing
         * to queues on src and dst IPs and ports
@@ -2337,75 +2415,39 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
                          RXH_L4_B_0_1 | RXH_L4_B_2_3))
                return -EINVAL;
 
-       /* We need at least the IP SRC and DEST fields for hashing */
-       if (!(nfc->data & RXH_IP_SRC) ||
-           !(nfc->data & RXH_IP_DST))
-               return -EINVAL;
-
        switch (nfc->flow_type) {
        case TCP_V4_FLOW:
-               switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
-               case 0:
-                       return -EINVAL;
-               case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
-                       if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
-                               hena |=
-                          BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK);
-
-                       hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP);
-                       break;
-               default:
-                       return -EINVAL;
-               }
+               flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+               if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+                       hena |=
+                         BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK);
                break;
        case TCP_V6_FLOW:
-               switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
-               case 0:
-                       return -EINVAL;
-               case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
-                       if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
-                               hena |=
-                          BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK);
-
-                       hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP);
-                       break;
-               default:
-                       return -EINVAL;
-               }
+               flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_TCP;
+               if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+                       hena |=
+                         BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK);
+               if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+                       hena |=
+                         BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK);
                break;
        case UDP_V4_FLOW:
-               switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
-               case 0:
-                       return -EINVAL;
-               case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
-                       if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
-                               hena |=
-                           BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) |
-                           BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP);
-
-                       hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) |
-                                BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4));
-                       break;
-               default:
-                       return -EINVAL;
-               }
+               flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
+               if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+                       hena |=
+                         BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) |
+                         BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP);
+
+               hena |= BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4);
                break;
        case UDP_V6_FLOW:
-               switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
-               case 0:
-                       return -EINVAL;
-               case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
-                       if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
-                               hena |=
-                           BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) |
-                           BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP);
-
-                       hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) |
-                                BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6));
-                       break;
-               default:
-                       return -EINVAL;
-               }
+               flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_UDP;
+               if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+                       hena |=
+                         BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) |
+                         BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP);
+
+               hena |= BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6);
                break;
        case AH_ESP_V4_FLOW:
        case AH_V4_FLOW:
@@ -2437,13 +2479,23 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
                return -EINVAL;
        }
 
+       if (flow_pctype) {
+               i_setc = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0,
+                                              flow_pctype)) |
+                       ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1,
+                                              flow_pctype)) << 32);
+               i_set = i40e_get_rss_hash_bits(nfc, i_setc);
+               i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_pctype),
+                                 (u32)i_set);
+               i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_pctype),
+                                 (u32)(i_set >> 32));
+               hena |= BIT_ULL(flow_pctype);
+       }
+
        i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena);
        i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32));
        i40e_flush(hw);
 
-       /* Save setting for future output/update */
-       pf->vsi[pf->lan_vsi]->rxnfc = *nfc;
-
        return 0;
 }
 
@@ -2744,11 +2796,15 @@ static void i40e_get_channels(struct net_device *dev,
 static int i40e_set_channels(struct net_device *dev,
                              struct ethtool_channels *ch)
 {
+       const u8 drop = I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET;
        struct i40e_netdev_priv *np = netdev_priv(dev);
        unsigned int count = ch->combined_count;
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
+       struct i40e_fdir_filter *rule;
+       struct hlist_node *node2;
        int new_count;
+       int err = 0;
 
        /* We do not support setting channels for any other VSI at present */
        if (vsi->type != I40E_VSI_MAIN)
@@ -2766,6 +2822,26 @@ static int i40e_set_channels(struct net_device *dev,
        if (count > i40e_max_channels(vsi))
                return -EINVAL;
 
+       /* verify that the number of channels does not invalidate any current
+        * flow director rules
+        */
+       hlist_for_each_entry_safe(rule, node2,
+                                 &pf->fdir_filter_list, fdir_node) {
+               if (rule->dest_ctl != drop && count <= rule->q_index) {
+                       dev_warn(&pf->pdev->dev,
+                                "Existing user defined filter %d assigns flow to queue %d\n",
+                                rule->fd_id, rule->q_index);
+                       err = -EINVAL;
+               }
+       }
+
+       if (err) {
+               dev_err(&pf->pdev->dev,
+                       "Existing filter rules must be deleted to reduce combined channel count to %d\n",
+                       count);
+               return err;
+       }
+
        /* update feature limits from largest to smallest supported values */
        /* TODO: Flow director limit, DCB etc */
 
index c6ac7a6..61b0fc4 100644 (file)
@@ -41,7 +41,7 @@ static const char i40e_driver_string[] =
 
 #define DRV_VERSION_MAJOR 1
 #define DRV_VERSION_MINOR 6
-#define DRV_VERSION_BUILD 11
+#define DRV_VERSION_BUILD 12
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
             __stringify(DRV_VERSION_MINOR) "." \
             __stringify(DRV_VERSION_BUILD)    DRV_KERN
@@ -527,6 +527,7 @@ void i40e_pf_reset_stats(struct i40e_pf *pf)
                        pf->veb[i]->stat_offsets_loaded = false;
                }
        }
+       pf->hw_csum_rx_error = 0;
 }
 
 /**
@@ -4616,7 +4617,7 @@ static u8 i40e_dcb_get_enabled_tc(struct i40e_dcbx_config *dcbcfg)
 static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
 {
        struct i40e_hw *hw = &pf->hw;
-       u8 i, enabled_tc;
+       u8 i, enabled_tc = 1;
        u8 num_tc = 0;
        struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
 
@@ -4634,8 +4635,6 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
        else
                return 1; /* Only TC0 */
 
-       /* At least have TC0 */
-       enabled_tc = (enabled_tc ? enabled_tc : 0x1);
        for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
                if (enabled_tc & BIT(i))
                        num_tc++;
@@ -5113,9 +5112,13 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf)
                                       DCB_CAP_DCBX_VER_IEEE;
 
                        pf->flags |= I40E_FLAG_DCB_CAPABLE;
-                       /* Enable DCB tagging only when more than one TC */
+                       /* Enable DCB tagging only when more than one TC
+                        * or explicitly disable if only one TC
+                        */
                        if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
                                pf->flags |= I40E_FLAG_DCB_ENABLED;
+                       else
+                               pf->flags &= ~I40E_FLAG_DCB_ENABLED;
                        dev_dbg(&pf->pdev->dev,
                                "DCBX offload is supported for this PF.\n");
                }
@@ -5431,7 +5434,6 @@ int i40e_open(struct net_device *netdev)
        wr32(&pf->hw, I40E_GLLAN_TSOMSK_L, be32_to_cpu(TCP_FLAG_CWR) >> 16);
 
        udp_tunnel_get_rx_info(netdev);
-       i40e_notify_client_of_netdev_open(vsi);
 
        return 0;
 }
@@ -5717,7 +5719,7 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf,
        u8 type;
 
        /* Not DCB capable or capability disabled */
-       if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
+       if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
                return ret;
 
        /* Ignore if event is not for Nearest Bridge */
@@ -7897,6 +7899,7 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
 #endif
                                       I40E_FLAG_RSS_ENABLED    |
                                       I40E_FLAG_DCB_CAPABLE    |
+                                      I40E_FLAG_DCB_ENABLED    |
                                       I40E_FLAG_SRIOV_ENABLED  |
                                       I40E_FLAG_FD_SB_ENABLED  |
                                       I40E_FLAG_FD_ATR_ENABLED |
@@ -7986,72 +7989,34 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf)
 static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
                              u8 *lut, u16 lut_size)
 {
-       struct i40e_aqc_get_set_rss_key_data rss_key;
        struct i40e_pf *pf = vsi->back;
        struct i40e_hw *hw = &pf->hw;
-       bool pf_lut = false;
-       u8 *rss_lut;
-       int ret, i;
-
-       memcpy(&rss_key, seed, sizeof(rss_key));
-
-       rss_lut = kzalloc(pf->rss_table_size, GFP_KERNEL);
-       if (!rss_lut)
-               return -ENOMEM;
-
-       /* Populate the LUT with max no. of queues in round robin fashion */
-       for (i = 0; i < vsi->rss_table_size; i++)
-               rss_lut[i] = i % vsi->rss_size;
+       int ret = 0;
 
-       ret = i40e_aq_set_rss_key(hw, vsi->id, &rss_key);
-       if (ret) {
-               dev_info(&pf->pdev->dev,
-                        "Cannot set RSS key, err %s aq_err %s\n",
-                        i40e_stat_str(&pf->hw, ret),
-                        i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
-               goto config_rss_aq_out;
+       if (seed) {
+               struct i40e_aqc_get_set_rss_key_data *seed_dw =
+                       (struct i40e_aqc_get_set_rss_key_data *)seed;
+               ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
+               if (ret) {
+                       dev_info(&pf->pdev->dev,
+                                "Cannot set RSS key, err %s aq_err %s\n",
+                                i40e_stat_str(hw, ret),
+                                i40e_aq_str(hw, hw->aq.asq_last_status));
+                       return ret;
+               }
        }
+       if (lut) {
+               bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false;
 
-       if (vsi->type == I40E_VSI_MAIN)
-               pf_lut = true;
-
-       ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, rss_lut,
-                                 vsi->rss_table_size);
-       if (ret)
-               dev_info(&pf->pdev->dev,
-                        "Cannot set RSS lut, err %s aq_err %s\n",
-                        i40e_stat_str(&pf->hw, ret),
-                        i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
-
-config_rss_aq_out:
-       kfree(rss_lut);
-       return ret;
-}
-
-/**
- * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
- * @vsi: VSI structure
- **/
-static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
-{
-       u8 seed[I40E_HKEY_ARRAY_SIZE];
-       struct i40e_pf *pf = vsi->back;
-       u8 *lut;
-       int ret;
-
-       if (!(pf->flags & I40E_FLAG_RSS_AQ_CAPABLE))
-               return 0;
-
-       lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
-       if (!lut)
-               return -ENOMEM;
-
-       i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
-       netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
-       vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs);
-       ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
-       kfree(lut);
-
+               ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
+               if (ret) {
+                       dev_info(&pf->pdev->dev,
+                                "Cannot set RSS lut, err %s aq_err %s\n",
+                                i40e_stat_str(hw, ret),
+                                i40e_aq_str(hw, hw->aq.asq_last_status));
+                       return ret;
+               }
+       }
        return ret;
 }
 
@@ -8101,6 +8066,46 @@ static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
        return ret;
 }
 
+/**
+ * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
+ * @vsi: VSI structure
+ **/
+static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
+{
+       u8 seed[I40E_HKEY_ARRAY_SIZE];
+       struct i40e_pf *pf = vsi->back;
+       u8 *lut;
+       int ret;
+
+       if (!(pf->flags & I40E_FLAG_RSS_AQ_CAPABLE))
+               return 0;
+
+       if (!vsi->rss_size)
+               vsi->rss_size = min_t(int, pf->alloc_rss_size,
+                                     vsi->num_queue_pairs);
+       if (!vsi->rss_size)
+               return -EINVAL;
+
+       lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+       if (!lut)
+               return -ENOMEM;
+       /* Use the user configured hash keys and lookup table if there is one,
+        * otherwise use default
+        */
+       if (vsi->rss_lut_user)
+               memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+       else
+               i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
+       if (vsi->rss_hkey_user)
+               memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
+       else
+               netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+       ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
+       kfree(lut);
+
+       return ret;
+}
+
 /**
  * i40e_config_rss_reg - Configure RSS keys and lut by writing registers
  * @vsi: Pointer to vsi structure
@@ -8691,6 +8696,28 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features)
        return need_reset;
 }
 
+/**
+ * i40e_clear_rss_lut - clear the rx hash lookup table
+ * @vsi: the VSI being configured
+ **/
+static void i40e_clear_rss_lut(struct i40e_vsi *vsi)
+{
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       u16 vf_id = vsi->vf_id;
+       u8 i;
+
+       if (vsi->type == I40E_VSI_MAIN) {
+               for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
+                       wr32(hw, I40E_PFQF_HLUT(i), 0);
+       } else if (vsi->type == I40E_VSI_SRIOV) {
+               for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++)
+                       i40e_write_rx_ctl(hw, I40E_VFQF_HLUT1(i, vf_id), 0);
+       } else {
+               dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n");
+       }
+}
+
 /**
  * i40e_set_features - set the netdev feature flags
  * @netdev: ptr to the netdev being adjusted
@@ -8704,6 +8731,12 @@ static int i40e_set_features(struct net_device *netdev,
        struct i40e_pf *pf = vsi->back;
        bool need_reset;
 
+       if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH))
+               i40e_pf_config_rss(pf);
+       else if (!(features & NETIF_F_RXHASH) &&
+                netdev->features & NETIF_F_RXHASH)
+               i40e_clear_rss_lut(vsi);
+
        if (features & NETIF_F_HW_VLAN_CTAG_RX)
                i40e_vlan_stripping_enable(vsi);
        else
@@ -10503,6 +10536,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
                               I40E_FLAG_FD_SB_ENABLED  |
                               I40E_FLAG_FD_ATR_ENABLED |
                               I40E_FLAG_DCB_CAPABLE    |
+                              I40E_FLAG_DCB_ENABLED    |
                               I40E_FLAG_SRIOV_ENABLED  |
                               I40E_FLAG_VMDQ_ENABLED);
        } else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED |
@@ -10526,7 +10560,8 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
                /* Not enough queues for all TCs */
                if ((pf->flags & I40E_FLAG_DCB_CAPABLE) &&
                    (queues_left < I40E_MAX_TRAFFIC_CLASS)) {
-                       pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+                       pf->flags &= ~(I40E_FLAG_DCB_CAPABLE |
+                                       I40E_FLAG_DCB_ENABLED);
                        dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n");
                }
                pf->num_lan_qps = max_t(int, pf->rss_size_max,
@@ -10923,7 +10958,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        err = i40e_init_pf_dcb(pf);
        if (err) {
                dev_info(&pdev->dev, "DCB init failed %d, disabled\n", err);
-               pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+               pf->flags &= ~(I40E_FLAG_DCB_CAPABLE & I40E_FLAG_DCB_ENABLED);
                /* Continue without DCB enabled */
        }
 #endif /* CONFIG_I40E_DCB */
@@ -11576,7 +11611,8 @@ static int __init i40e_init_module(void)
         * it can't be any worse than using the system workqueue which
         * was already single threaded
         */
-       i40e_wq = create_singlethread_workqueue(i40e_driver_name);
+       i40e_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1,
+                                 i40e_driver_name);
        if (!i40e_wq) {
                pr_err("%s: Failed to create workqueue\n", i40e_driver_name);
                return -ENOMEM;
index df7ecc9..f8d6623 100644 (file)
@@ -2840,10 +2840,9 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
                                                  I40E_TXD_QW1_CMD_SHIFT);
 
        /* notify HW of packet */
-       if (!tail_bump)
+       if (!tail_bump) {
                prefetchw(tx_desc + 1);
-
-       if (tail_bump) {
+       } else {
                /* Force memory writes to complete before letting h/w
                 * know there are new descriptors to fetch.  (Only
                 * applicable for weak-ordered memory model archs,
@@ -2852,7 +2851,6 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
                wmb();
                writel(i, tx_ring->tail);
        }
-
        return;
 
 dma_error:
index 6fcbf76..da34235 100644 (file)
@@ -991,7 +991,10 @@ complete_reset:
                i40e_enable_vf_mappings(vf);
                set_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states);
                clear_bit(I40E_VF_STAT_DISABLED, &vf->vf_states);
-               i40e_notify_client_of_vf_reset(pf, abs_vf_id);
+               /* Do not notify the client during VF init */
+               if (vf->pf->num_alloc_vfs)
+                       i40e_notify_client_of_vf_reset(pf, abs_vf_id);
+               vf->num_vlan = 0;
        }
        /* tell the VF the reset is done */
        wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), I40E_VFR_VFACTIVE);
@@ -1089,7 +1092,6 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs)
                        goto err_iov;
                }
        }
-       i40e_notify_client_of_vf_enable(pf, num_alloc_vfs);
        /* allocate memory */
        vfs = kcalloc(num_alloc_vfs, sizeof(struct i40e_vf), GFP_KERNEL);
        if (!vfs) {
@@ -1113,6 +1115,8 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs)
        }
        pf->num_alloc_vfs = num_alloc_vfs;
 
+       i40e_notify_client_of_vf_enable(pf, num_alloc_vfs);
+
 err_alloc:
        if (ret)
                i40e_free_vfs(pf);
@@ -2314,6 +2318,7 @@ err:
        /* send the response back to the VF */
        aq_ret = i40e_vc_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS,
                                        aq_ret, (u8 *)vrh, len);
+       kfree(vrh);
        return aq_ret;
 }
 
@@ -2995,6 +3000,7 @@ int i40e_ndo_get_vf_config(struct net_device *netdev,
        else
                ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE;
        ivi->spoofchk = vf->spoofchk;
+       ivi->trusted = vf->trusted;
        ret = 0;
 
 error_param:
index 3114dcf..40b0eaf 100644 (file)
@@ -204,6 +204,9 @@ enum i40e_admin_queue_opc {
        i40e_aqc_opc_suspend_port_tx                            = 0x041B,
        i40e_aqc_opc_resume_port_tx                             = 0x041C,
        i40e_aqc_opc_configure_partition_bw                     = 0x041D,
+       /* hmc */
+       i40e_aqc_opc_query_hmc_resource_profile = 0x0500,
+       i40e_aqc_opc_set_hmc_resource_profile   = 0x0501,
 
        /* phy commands*/
        i40e_aqc_opc_get_phy_abilities          = 0x0600,
@@ -447,13 +450,15 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_cppm_configuration);
 /* Set ARP Proxy command / response (indirect 0x0104) */
 struct i40e_aqc_arp_proxy_data {
        __le16  command_flags;
-#define I40E_AQ_ARP_INIT_IPV4  0x0008
-#define I40E_AQ_ARP_UNSUP_CTL  0x0010
-#define I40E_AQ_ARP_ENA                0x0020
-#define I40E_AQ_ARP_ADD_IPV4   0x0040
-#define I40E_AQ_ARP_DEL_IPV4   0x0080
+#define I40E_AQ_ARP_INIT_IPV4  0x0800
+#define I40E_AQ_ARP_UNSUP_CTL  0x1000
+#define I40E_AQ_ARP_ENA                0x2000
+#define I40E_AQ_ARP_ADD_IPV4   0x4000
+#define I40E_AQ_ARP_DEL_IPV4   0x8000
        __le16  table_id;
-       __le32  pfpm_proxyfc;
+       __le32  enabled_offloads;
+#define I40E_AQ_ARP_DIRECTED_OFFLOAD_ENABLE    0x00000020
+#define I40E_AQ_ARP_OFFLOAD_ENABLE             0x00000800
        __le32  ip_addr;
        u8      mac_addr[6];
        u8      reserved[2];
@@ -468,17 +473,19 @@ struct i40e_aqc_ns_proxy_data {
        __le16  table_idx_ipv6_0;
        __le16  table_idx_ipv6_1;
        __le16  control;
-#define I40E_AQ_NS_PROXY_ADD_0         0x0100
-#define I40E_AQ_NS_PROXY_DEL_0         0x0200
-#define I40E_AQ_NS_PROXY_ADD_1         0x0400
-#define I40E_AQ_NS_PROXY_DEL_1         0x0800
-#define I40E_AQ_NS_PROXY_ADD_IPV6_0    0x1000
-#define I40E_AQ_NS_PROXY_DEL_IPV6_0    0x2000
-#define I40E_AQ_NS_PROXY_ADD_IPV6_1    0x4000
-#define I40E_AQ_NS_PROXY_DEL_IPV6_1    0x8000
-#define I40E_AQ_NS_PROXY_COMMAND_SEQ   0x0001
-#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0002
-#define I40E_AQ_NS_PROXY_INIT_MAC_TBL  0x0004
+#define I40E_AQ_NS_PROXY_ADD_0         0x0001
+#define I40E_AQ_NS_PROXY_DEL_0         0x0002
+#define I40E_AQ_NS_PROXY_ADD_1         0x0004
+#define I40E_AQ_NS_PROXY_DEL_1         0x0008
+#define I40E_AQ_NS_PROXY_ADD_IPV6_0    0x0010
+#define I40E_AQ_NS_PROXY_DEL_IPV6_0    0x0020
+#define I40E_AQ_NS_PROXY_ADD_IPV6_1    0x0040
+#define I40E_AQ_NS_PROXY_DEL_IPV6_1    0x0080
+#define I40E_AQ_NS_PROXY_COMMAND_SEQ   0x0100
+#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0200
+#define I40E_AQ_NS_PROXY_INIT_MAC_TBL  0x0400
+#define I40E_AQ_NS_PROXY_OFFLOAD_ENABLE        0x0800
+#define I40E_AQ_NS_PROXY_DIRECTED_OFFLOAD_ENABLE       0x1000
        u8      mac_addr_0[6];
        u8      mac_addr_1[6];
        u8      local_mac_addr[6];
@@ -1579,6 +1586,24 @@ struct i40e_aqc_configure_partition_bw_data {
 
 I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data);
 
+/* Get and set the active HMC resource profile and status.
+ * (direct 0x0500) and (direct 0x0501)
+ */
+struct i40e_aq_get_set_hmc_resource_profile {
+       u8      pm_profile;
+       u8      pe_vf_enabled;
+       u8      reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aq_get_set_hmc_resource_profile);
+
+enum i40e_aq_hmc_profile {
+       /* I40E_HMC_PROFILE_NO_CHANGE   = 0, reserved */
+       I40E_HMC_PROFILE_DEFAULT        = 1,
+       I40E_HMC_PROFILE_FAVOR_VF       = 2,
+       I40E_HMC_PROFILE_EQUAL          = 3,
+};
+
 /* Get PHY Abilities (indirect 0x0600) uses the generic indirect struct */
 
 /* set in param0 for get phy abilities to report qualified modules */
index a579193..0130458 100644 (file)
@@ -2068,10 +2068,9 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
                                                  I40E_TXD_QW1_CMD_SHIFT);
 
        /* notify HW of packet */
-       if (!tail_bump)
+       if (!tail_bump) {
                prefetchw(tx_desc + 1);
-
-       if (tail_bump) {
+       } else {
                /* Force memory writes to complete before letting h/w
                 * know there are new descriptors to fetch.  (Only
                 * applicable for weak-ordered memory model archs,
@@ -2080,7 +2079,6 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
                wmb();
                writel(i, tx_ring->tail);
        }
-
        return;
 
 dma_error:
index 76ed97d..dc00aaf 100644 (file)
@@ -71,20 +71,20 @@ struct i40e_vsi {
 
 /* How many Rx Buffers do we bundle into one write to the hardware ? */
 #define I40EVF_RX_BUFFER_WRITE 16      /* Must be power of 2 */
-#define I40EVF_DEFAULT_TXD   512
-#define I40EVF_DEFAULT_RXD   512
-#define I40EVF_MAX_TXD       4096
-#define I40EVF_MIN_TXD       64
-#define I40EVF_MAX_RXD       4096
-#define I40EVF_MIN_RXD       64
-#define I40EVF_REQ_DESCRIPTOR_MULTIPLE  32
+#define I40EVF_DEFAULT_TXD     512
+#define I40EVF_DEFAULT_RXD     512
+#define I40EVF_MAX_TXD         4096
+#define I40EVF_MIN_TXD         64
+#define I40EVF_MAX_RXD         4096
+#define I40EVF_MIN_RXD         64
+#define I40EVF_REQ_DESCRIPTOR_MULTIPLE 32
 
 /* Supported Rx Buffer Sizes */
-#define I40EVF_RXBUFFER_2048  2048
-#define I40EVF_MAX_RXBUFFER   16384  /* largest size for single descriptor */
-#define I40EVF_MAX_AQ_BUF_SIZE    4096
-#define I40EVF_AQ_LEN             32
-#define I40EVF_AQ_MAX_ERR         20 /* times to try before resetting AQ */
+#define I40EVF_RXBUFFER_2048   2048
+#define I40EVF_MAX_RXBUFFER    16384  /* largest size for single descriptor */
+#define I40EVF_MAX_AQ_BUF_SIZE 4096
+#define I40EVF_AQ_LEN          32
+#define I40EVF_AQ_MAX_ERR      20 /* times to try before resetting AQ */
 
 #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
 
@@ -111,7 +111,7 @@ struct i40e_q_vector {
        u8 num_ringpairs;       /* total number of ring pairs in vector */
 #define ITR_COUNTDOWN_START 100
        u8 itr_countdown;       /* when 0 or 1 update ITR */
-       int v_idx;        /* vector index in list */
+       int v_idx;      /* vector index in list */
        char name[IFNAMSIZ + 9];
        bool arm_wb_state;
        cpumask_var_t affinity_mask;
@@ -129,11 +129,11 @@ struct i40e_q_vector {
        ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
        (R)->next_to_clean - (R)->next_to_use - 1)
 
-#define I40EVF_RX_DESC_ADV(R, i)           \
+#define I40EVF_RX_DESC_ADV(R, i)       \
        (&(((union i40e_adv_rx_desc *)((R).desc))[i]))
-#define I40EVF_TX_DESC_ADV(R, i)           \
+#define I40EVF_TX_DESC_ADV(R, i)       \
        (&(((union i40e_adv_tx_desc *)((R).desc))[i]))
-#define I40EVF_TX_CTXTDESC_ADV(R, i)       \
+#define I40EVF_TX_CTXTDESC_ADV(R, i)   \
        (&(((struct i40e_adv_tx_context_desc *)((R).desc))[i]))
 
 #define OTHER_VECTOR 1
@@ -204,22 +204,25 @@ struct i40evf_adapter {
        struct msix_entry *msix_entries;
 
        u32 flags;
-#define I40EVF_FLAG_RX_CSUM_ENABLED              BIT(0)
-#define I40EVF_FLAG_IMIR_ENABLED                 BIT(5)
-#define I40EVF_FLAG_MQ_CAPABLE                   BIT(6)
-#define I40EVF_FLAG_NEED_LINK_UPDATE             BIT(7)
-#define I40EVF_FLAG_PF_COMMS_FAILED              BIT(8)
-#define I40EVF_FLAG_RESET_PENDING                BIT(9)
-#define I40EVF_FLAG_RESET_NEEDED                 BIT(10)
+#define I40EVF_FLAG_RX_CSUM_ENABLED            BIT(0)
+#define I40EVF_FLAG_IN_NETPOLL                 BIT(4)
+#define I40EVF_FLAG_IMIR_ENABLED               BIT(5)
+#define I40EVF_FLAG_MQ_CAPABLE                 BIT(6)
+#define I40EVF_FLAG_NEED_LINK_UPDATE           BIT(7)
+#define I40EVF_FLAG_PF_COMMS_FAILED            BIT(8)
+#define I40EVF_FLAG_RESET_PENDING              BIT(9)
+#define I40EVF_FLAG_RESET_NEEDED               BIT(10)
 #define I40EVF_FLAG_WB_ON_ITR_CAPABLE          BIT(11)
 #define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE     BIT(12)
 #define I40EVF_FLAG_ADDR_SET_BY_PF             BIT(13)
+#define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED   BIT(14)
 #define I40EVF_FLAG_PROMISC_ON                 BIT(15)
 #define I40EVF_FLAG_ALLMULTI_ON                        BIT(16)
 /* duplicates for common code */
-#define I40E_FLAG_FDIR_ATR_ENABLED              0
-#define I40E_FLAG_DCB_ENABLED                   0
-#define I40E_FLAG_RX_CSUM_ENABLED                I40EVF_FLAG_RX_CSUM_ENABLED
+#define I40E_FLAG_FDIR_ATR_ENABLED             0
+#define I40E_FLAG_DCB_ENABLED                  0
+#define I40E_FLAG_IN_NETPOLL                   I40EVF_FLAG_IN_NETPOLL
+#define I40E_FLAG_RX_CSUM_ENABLED              I40EVF_FLAG_RX_CSUM_ENABLED
 #define I40E_FLAG_WB_ON_ITR_CAPABLE            I40EVF_FLAG_WB_ON_ITR_CAPABLE
 #define I40E_FLAG_OUTER_UDP_CSUM_CAPABLE       I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE
        /* flags for admin queue service task */
@@ -233,7 +236,7 @@ struct i40evf_adapter {
 #define I40EVF_FLAG_AQ_CONFIGURE_QUEUES                BIT(6)
 #define I40EVF_FLAG_AQ_MAP_VECTORS             BIT(7)
 #define I40EVF_FLAG_AQ_HANDLE_RESET            BIT(8)
-#define I40EVF_FLAG_AQ_CONFIGURE_RSS           BIT(9)  /* direct AQ config */
+#define I40EVF_FLAG_AQ_CONFIGURE_RSS           BIT(9) /* direct AQ config */
 #define I40EVF_FLAG_AQ_GET_CONFIG              BIT(10)
 /* Newer style, RSS done by the PF so we can ignore hardware vagaries. */
 #define I40EVF_FLAG_AQ_GET_HENA                        BIT(11)
@@ -258,6 +261,7 @@ struct i40evf_adapter {
        struct work_struct watchdog_task;
        bool netdev_registered;
        bool link_up;
+       enum i40e_aq_link_speed link_speed;
        enum i40e_virtchnl_ops current_op;
 #define CLIENT_ENABLED(_a) ((_a)->vf_res ? \
                            (_a)->vf_res->vf_offload_flags & \
index c9c202f..e17a154 100644 (file)
@@ -74,13 +74,33 @@ static const struct i40evf_stats i40evf_gstrings_stats[] = {
 static int i40evf_get_settings(struct net_device *netdev,
                               struct ethtool_cmd *ecmd)
 {
-       /* In the future the VF will be able to query the PF for
-        * some information - for now use a dummy value
-        */
+       struct i40evf_adapter *adapter = netdev_priv(netdev);
+
        ecmd->supported = 0;
        ecmd->autoneg = AUTONEG_DISABLE;
        ecmd->transceiver = XCVR_DUMMY1;
        ecmd->port = PORT_NONE;
+       /* Set speed and duplex */
+       switch (adapter->link_speed) {
+       case I40E_LINK_SPEED_40GB:
+               ethtool_cmd_speed_set(ecmd, SPEED_40000);
+               break;
+       case I40E_LINK_SPEED_20GB:
+               ethtool_cmd_speed_set(ecmd, SPEED_20000);
+               break;
+       case I40E_LINK_SPEED_10GB:
+               ethtool_cmd_speed_set(ecmd, SPEED_10000);
+               break;
+       case I40E_LINK_SPEED_1GB:
+               ethtool_cmd_speed_set(ecmd, SPEED_1000);
+               break;
+       case I40E_LINK_SPEED_100MB:
+               ethtool_cmd_speed_set(ecmd, SPEED_100);
+               break;
+       default:
+               break;
+       }
+       ecmd->duplex = DUPLEX_FULL;
 
        return 0;
 }
index 600fb9c..f751f7b 100644 (file)
@@ -38,7 +38,7 @@ static const char i40evf_driver_string[] =
 
 #define DRV_VERSION_MAJOR 1
 #define DRV_VERSION_MINOR 6
-#define DRV_VERSION_BUILD 11
+#define DRV_VERSION_BUILD 12
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
             __stringify(DRV_VERSION_MINOR) "." \
             __stringify(DRV_VERSION_BUILD) \
@@ -1420,7 +1420,9 @@ int i40evf_init_interrupt_scheme(struct i40evf_adapter *adapter)
 {
        int err;
 
+       rtnl_lock();
        err = i40evf_set_interrupt_capability(adapter);
+       rtnl_unlock();
        if (err) {
                dev_err(&adapter->pdev->dev,
                        "Unable to setup interrupt capabilities\n");
@@ -1802,6 +1804,8 @@ continue_reset:
        }
        adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER;
        adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
+       /* Open RDMA Client again */
+       adapter->aq_required |= I40EVF_FLAG_SERVICE_CLIENT_REQUESTED;
        clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
        i40evf_misc_irq_enable(adapter);
 
@@ -2831,7 +2835,8 @@ static int __init i40evf_init_module(void)
 
        pr_info("%s\n", i40evf_copyright);
 
-       i40evf_wq = create_singlethread_workqueue(i40evf_driver_name);
+       i40evf_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1,
+                                   i40evf_driver_name);
        if (!i40evf_wq) {
                pr_err("%s: Failed to create workqueue\n", i40evf_driver_name);
                return -ENOMEM;
index d76c221..cc6cb30 100644 (file)
@@ -816,6 +816,45 @@ void i40evf_set_rss_lut(struct i40evf_adapter *adapter)
        kfree(vrl);
 }
 
+/**
+ * i40evf_print_link_message - print link up or down
+ * @adapter: adapter structure
+ *
+ * Log a message telling the world of our wonderous link status
+ */
+static void i40evf_print_link_message(struct i40evf_adapter *adapter)
+{
+       struct net_device *netdev = adapter->netdev;
+       char *speed = "Unknown ";
+
+       if (!adapter->link_up) {
+               netdev_info(netdev, "NIC Link is Down\n");
+               return;
+       }
+
+       switch (adapter->link_speed) {
+       case I40E_LINK_SPEED_40GB:
+               speed = "40 G";
+               break;
+       case I40E_LINK_SPEED_20GB:
+               speed = "20 G";
+               break;
+       case I40E_LINK_SPEED_10GB:
+               speed = "10 G";
+               break;
+       case I40E_LINK_SPEED_1GB:
+               speed = "1000 M";
+               break;
+       case I40E_LINK_SPEED_100MB:
+               speed = "100 M";
+               break;
+       default:
+               break;
+       }
+
+       netdev_info(netdev, "NIC Link is Up %sbps Full Duplex\n", speed);
+}
+
 /**
  * i40evf_request_reset
  * @adapter: adapter structure
@@ -853,15 +892,13 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
                        (struct i40e_virtchnl_pf_event *)msg;
                switch (vpe->event) {
                case I40E_VIRTCHNL_EVENT_LINK_CHANGE:
-                       adapter->link_up =
-                               vpe->event_data.link_event.link_status;
-                       if (adapter->link_up && !netif_carrier_ok(netdev)) {
-                               dev_info(&adapter->pdev->dev, "NIC Link is Up\n");
-                               netif_carrier_on(netdev);
-                               netif_tx_wake_all_queues(netdev);
-                       } else if (!adapter->link_up) {
-                               dev_info(&adapter->pdev->dev, "NIC Link is Down\n");
-                               netif_carrier_off(netdev);
+                       adapter->link_speed =
+                               vpe->event_data.link_event.link_speed;
+                       if (adapter->link_up !=
+                           vpe->event_data.link_event.link_status) {
+                               adapter->link_up =
+                                       vpe->event_data.link_event.link_status;
+                               i40evf_print_link_message(adapter);
                                netif_tx_stop_all_queues(netdev);
                        }
                        break;
index 199ff98..acf0605 100644 (file)
@@ -188,6 +188,11 @@ struct e1000_adv_tx_context_desc {
 /* ETQF register bit definitions */
 #define E1000_ETQF_FILTER_ENABLE   BIT(26)
 #define E1000_ETQF_1588            BIT(30)
+#define E1000_ETQF_IMM_INT         BIT(29)
+#define E1000_ETQF_QUEUE_ENABLE    BIT(31)
+#define E1000_ETQF_QUEUE_SHIFT     16
+#define E1000_ETQF_QUEUE_MASK      0x00070000
+#define E1000_ETQF_ETYPE_MASK      0x0000FFFF
 
 /* FTQF register bit definitions */
 #define E1000_FTQF_VF_BP               0x00008000
index 2997c44..2688180 100644 (file)
 #define E1000_RTTBCNRC_RF_INT_MASK     \
        (E1000_RTTBCNRC_RF_DEC_MASK << E1000_RTTBCNRC_RF_INT_SHIFT)
 
+#define E1000_VLAPQF_QUEUE_SEL(_n, q_idx) (q_idx << ((_n) * 4))
+#define E1000_VLAPQF_P_VALID(_n)       (0x1 << (3 + (_n) * 4))
+#define E1000_VLAPQF_QUEUE_MASK        0x03
+
 #endif
index 21d9d02..d84afdd 100644 (file)
                                        (0x054E0 + ((_i - 16) * 8)))
 #define E1000_RAH(_i)  (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
                                        (0x054E4 + ((_i - 16) * 8)))
+#define E1000_VLAPQF   0x055B0  /* VLAN Priority Queue Filter VLAPQF */
 #define E1000_IP4AT_REG(_i)     (0x05840 + ((_i) * 8))
 #define E1000_IP6AT_REG(_i)     (0x05880 + ((_i) * 4))
 #define E1000_WUPM_REG(_i)      (0x05A00 + ((_i) * 4))
index 5387b3a..03fbe4b 100644 (file)
@@ -350,11 +350,49 @@ struct hwmon_buff {
        };
 #endif
 
+/* The number of L2 ether-type filter registers, Index 3 is reserved
+ * for PTP 1588 timestamp
+ */
+#define MAX_ETYPE_FILTER       (4 - 1)
+/* ETQF filter list: one static filter per filter consumer. This is
+ * to avoid filter collisions later. Add new filters here!!
+ *
+ * Current filters:            Filter 3
+ */
+#define IGB_ETQF_FILTER_1588   3
+
 #define IGB_N_EXTTS    2
 #define IGB_N_PEROUT   2
 #define IGB_N_SDP      4
 #define IGB_RETA_SIZE  128
 
+enum igb_filter_match_flags {
+       IGB_FILTER_FLAG_ETHER_TYPE = 0x1,
+       IGB_FILTER_FLAG_VLAN_TCI   = 0x2,
+};
+
+#define IGB_MAX_RXNFC_FILTERS 16
+
+/* RX network flow classification data structure */
+struct igb_nfc_input {
+       /* Byte layout in order, all values with MSB first:
+        * match_flags - 1 byte
+        * etype - 2 bytes
+        * vlan_tci - 2 bytes
+        */
+       u8 match_flags;
+       __be16 etype;
+       __be16 vlan_tci;
+};
+
+struct igb_nfc_filter {
+       struct hlist_node nfc_node;
+       struct igb_nfc_input filter;
+       u16 etype_reg_index;
+       u16 sw_idx;
+       u16 action;
+};
+
 /* board specific private data structure */
 struct igb_adapter {
        unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
@@ -473,6 +511,13 @@ struct igb_adapter {
        int copper_tries;
        struct e1000_info ei;
        u16 eee_advert;
+
+       /* RX network flow classification support */
+       struct hlist_head nfc_filter_list;
+       unsigned int nfc_filter_count;
+       /* lock for RX network flow classification filter */
+       spinlock_t nfc_lock;
+       bool etype_bitmap[MAX_ETYPE_FILTER];
 };
 
 /* flags controlling PTP/1588 function */
@@ -599,4 +644,9 @@ static inline struct netdev_queue *txring_txq(const struct igb_ring *tx_ring)
        return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
 }
 
+int igb_add_filter(struct igb_adapter *adapter,
+                  struct igb_nfc_filter *input);
+int igb_erase_filter(struct igb_adapter *adapter,
+                    struct igb_nfc_filter *input);
+
 #endif /* _IGB_H_ */
index 64e91c5..0c33eca 100644 (file)
@@ -2431,6 +2431,63 @@ static int igb_get_ts_info(struct net_device *dev,
        }
 }
 
+#define ETHER_TYPE_FULL_MASK ((__force __be16)~0)
+static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter,
+                                    struct ethtool_rxnfc *cmd)
+{
+       struct ethtool_rx_flow_spec *fsp = &cmd->fs;
+       struct igb_nfc_filter *rule = NULL;
+
+       /* report total rule count */
+       cmd->data = IGB_MAX_RXNFC_FILTERS;
+
+       hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+               if (fsp->location <= rule->sw_idx)
+                       break;
+       }
+
+       if (!rule || fsp->location != rule->sw_idx)
+               return -EINVAL;
+
+       if (rule->filter.match_flags) {
+               fsp->flow_type = ETHER_FLOW;
+               fsp->ring_cookie = rule->action;
+               if (rule->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) {
+                       fsp->h_u.ether_spec.h_proto = rule->filter.etype;
+                       fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK;
+               }
+               if (rule->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI) {
+                       fsp->flow_type |= FLOW_EXT;
+                       fsp->h_ext.vlan_tci = rule->filter.vlan_tci;
+                       fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK);
+               }
+               return 0;
+       }
+       return -EINVAL;
+}
+
+static int igb_get_ethtool_nfc_all(struct igb_adapter *adapter,
+                                  struct ethtool_rxnfc *cmd,
+                                  u32 *rule_locs)
+{
+       struct igb_nfc_filter *rule;
+       int cnt = 0;
+
+       /* report total rule count */
+       cmd->data = IGB_MAX_RXNFC_FILTERS;
+
+       hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+               if (cnt == cmd->rule_cnt)
+                       return -EMSGSIZE;
+               rule_locs[cnt] = rule->sw_idx;
+               cnt++;
+       }
+
+       cmd->rule_cnt = cnt;
+
+       return 0;
+}
+
 static int igb_get_rss_hash_opts(struct igb_adapter *adapter,
                                 struct ethtool_rxnfc *cmd)
 {
@@ -2484,6 +2541,16 @@ static int igb_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
                cmd->data = adapter->num_rx_queues;
                ret = 0;
                break;
+       case ETHTOOL_GRXCLSRLCNT:
+               cmd->rule_cnt = adapter->nfc_filter_count;
+               ret = 0;
+               break;
+       case ETHTOOL_GRXCLSRULE:
+               ret = igb_get_ethtool_nfc_entry(adapter, cmd);
+               break;
+       case ETHTOOL_GRXCLSRLALL:
+               ret = igb_get_ethtool_nfc_all(adapter, cmd, rule_locs);
+               break;
        case ETHTOOL_GRXFH:
                ret = igb_get_rss_hash_opts(adapter, cmd);
                break;
@@ -2598,6 +2665,279 @@ static int igb_set_rss_hash_opt(struct igb_adapter *adapter,
        return 0;
 }
 
+static int igb_rxnfc_write_etype_filter(struct igb_adapter *adapter,
+                                       struct igb_nfc_filter *input)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       u8 i;
+       u32 etqf;
+       u16 etype;
+
+       /* find an empty etype filter register */
+       for (i = 0; i < MAX_ETYPE_FILTER; ++i) {
+               if (!adapter->etype_bitmap[i])
+                       break;
+       }
+       if (i == MAX_ETYPE_FILTER) {
+               dev_err(&adapter->pdev->dev, "ethtool -N: etype filters are all used.\n");
+               return -EINVAL;
+       }
+
+       adapter->etype_bitmap[i] = true;
+
+       etqf = rd32(E1000_ETQF(i));
+       etype = ntohs(input->filter.etype & ETHER_TYPE_FULL_MASK);
+
+       etqf |= E1000_ETQF_FILTER_ENABLE;
+       etqf &= ~E1000_ETQF_ETYPE_MASK;
+       etqf |= (etype & E1000_ETQF_ETYPE_MASK);
+
+       etqf &= ~E1000_ETQF_QUEUE_MASK;
+       etqf |= ((input->action << E1000_ETQF_QUEUE_SHIFT)
+               & E1000_ETQF_QUEUE_MASK);
+       etqf |= E1000_ETQF_QUEUE_ENABLE;
+
+       wr32(E1000_ETQF(i), etqf);
+
+       input->etype_reg_index = i;
+
+       return 0;
+}
+
+int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter,
+                                    struct igb_nfc_filter *input)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       u8 vlan_priority;
+       u16 queue_index;
+       u32 vlapqf;
+
+       vlapqf = rd32(E1000_VLAPQF);
+       vlan_priority = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK)
+                               >> VLAN_PRIO_SHIFT;
+       queue_index = (vlapqf >> (vlan_priority * 4)) & E1000_VLAPQF_QUEUE_MASK;
+
+       /* check whether this vlan prio is already set */
+       if ((vlapqf & E1000_VLAPQF_P_VALID(vlan_priority)) &&
+           (queue_index != input->action)) {
+               dev_err(&adapter->pdev->dev, "ethtool rxnfc set vlan prio filter failed.\n");
+               return -EEXIST;
+       }
+
+       vlapqf |= E1000_VLAPQF_P_VALID(vlan_priority);
+       vlapqf |= E1000_VLAPQF_QUEUE_SEL(vlan_priority, input->action);
+
+       wr32(E1000_VLAPQF, vlapqf);
+
+       return 0;
+}
+
+int igb_add_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input)
+{
+       int err = -EINVAL;
+
+       if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) {
+               err = igb_rxnfc_write_etype_filter(adapter, input);
+               if (err)
+                       return err;
+       }
+
+       if (input->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI)
+               err = igb_rxnfc_write_vlan_prio_filter(adapter, input);
+
+       return err;
+}
+
+static void igb_clear_etype_filter_regs(struct igb_adapter *adapter,
+                                       u16 reg_index)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       u32 etqf = rd32(E1000_ETQF(reg_index));
+
+       etqf &= ~E1000_ETQF_QUEUE_ENABLE;
+       etqf &= ~E1000_ETQF_QUEUE_MASK;
+       etqf &= ~E1000_ETQF_FILTER_ENABLE;
+
+       wr32(E1000_ETQF(reg_index), etqf);
+
+       adapter->etype_bitmap[reg_index] = false;
+}
+
+static void igb_clear_vlan_prio_filter(struct igb_adapter *adapter,
+                                      u16 vlan_tci)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       u8 vlan_priority;
+       u32 vlapqf;
+
+       vlan_priority = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+
+       vlapqf = rd32(E1000_VLAPQF);
+       vlapqf &= ~E1000_VLAPQF_P_VALID(vlan_priority);
+       vlapqf &= ~E1000_VLAPQF_QUEUE_SEL(vlan_priority,
+                                               E1000_VLAPQF_QUEUE_MASK);
+
+       wr32(E1000_VLAPQF, vlapqf);
+}
+
+int igb_erase_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input)
+{
+       if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE)
+               igb_clear_etype_filter_regs(adapter,
+                                           input->etype_reg_index);
+
+       if (input->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI)
+               igb_clear_vlan_prio_filter(adapter,
+                                          ntohs(input->filter.vlan_tci));
+
+       return 0;
+}
+
+static int igb_update_ethtool_nfc_entry(struct igb_adapter *adapter,
+                                       struct igb_nfc_filter *input,
+                                       u16 sw_idx)
+{
+       struct igb_nfc_filter *rule, *parent;
+       int err = -EINVAL;
+
+       parent = NULL;
+       rule = NULL;
+
+       hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+               /* hash found, or no matching entry */
+               if (rule->sw_idx >= sw_idx)
+                       break;
+               parent = rule;
+       }
+
+       /* if there is an old rule occupying our place remove it */
+       if (rule && (rule->sw_idx == sw_idx)) {
+               if (!input)
+                       err = igb_erase_filter(adapter, rule);
+
+               hlist_del(&rule->nfc_node);
+               kfree(rule);
+               adapter->nfc_filter_count--;
+       }
+
+       /* If no input this was a delete, err should be 0 if a rule was
+        * successfully found and removed from the list else -EINVAL
+        */
+       if (!input)
+               return err;
+
+       /* initialize node */
+       INIT_HLIST_NODE(&input->nfc_node);
+
+       /* add filter to the list */
+       if (parent)
+               hlist_add_behind(&parent->nfc_node, &input->nfc_node);
+       else
+               hlist_add_head(&input->nfc_node, &adapter->nfc_filter_list);
+
+       /* update counts */
+       adapter->nfc_filter_count++;
+
+       return 0;
+}
+
+static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter,
+                                    struct ethtool_rxnfc *cmd)
+{
+       struct net_device *netdev = adapter->netdev;
+       struct ethtool_rx_flow_spec *fsp =
+               (struct ethtool_rx_flow_spec *)&cmd->fs;
+       struct igb_nfc_filter *input, *rule;
+       int err = 0;
+
+       if (!(netdev->hw_features & NETIF_F_NTUPLE))
+               return -EOPNOTSUPP;
+
+       /* Don't allow programming if the action is a queue greater than
+        * the number of online Rx queues.
+        */
+       if ((fsp->ring_cookie == RX_CLS_FLOW_DISC) ||
+           (fsp->ring_cookie >= adapter->num_rx_queues)) {
+               dev_err(&adapter->pdev->dev, "ethtool -N: The specified action is invalid\n");
+               return -EINVAL;
+       }
+
+       /* Don't allow indexes to exist outside of available space */
+       if (fsp->location >= IGB_MAX_RXNFC_FILTERS) {
+               dev_err(&adapter->pdev->dev, "Location out of range\n");
+               return -EINVAL;
+       }
+
+       if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW)
+               return -EINVAL;
+
+       if (fsp->m_u.ether_spec.h_proto != ETHER_TYPE_FULL_MASK &&
+           fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK))
+               return -EINVAL;
+
+       input = kzalloc(sizeof(*input), GFP_KERNEL);
+       if (!input)
+               return -ENOMEM;
+
+       if (fsp->m_u.ether_spec.h_proto == ETHER_TYPE_FULL_MASK) {
+               input->filter.etype = fsp->h_u.ether_spec.h_proto;
+               input->filter.match_flags = IGB_FILTER_FLAG_ETHER_TYPE;
+       }
+
+       if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) {
+               if (fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) {
+                       err = -EINVAL;
+                       goto err_out;
+               }
+               input->filter.vlan_tci = fsp->h_ext.vlan_tci;
+               input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI;
+       }
+
+       input->action = fsp->ring_cookie;
+       input->sw_idx = fsp->location;
+
+       spin_lock(&adapter->nfc_lock);
+
+       hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+               if (!memcmp(&input->filter, &rule->filter,
+                           sizeof(input->filter))) {
+                       err = -EEXIST;
+                       dev_err(&adapter->pdev->dev,
+                               "ethtool: this filter is already set\n");
+                       goto err_out_w_lock;
+               }
+       }
+
+       err = igb_add_filter(adapter, input);
+       if (err)
+               goto err_out_w_lock;
+
+       igb_update_ethtool_nfc_entry(adapter, input, input->sw_idx);
+
+       spin_unlock(&adapter->nfc_lock);
+       return 0;
+
+err_out_w_lock:
+       spin_unlock(&adapter->nfc_lock);
+err_out:
+       kfree(input);
+       return err;
+}
+
+static int igb_del_ethtool_nfc_entry(struct igb_adapter *adapter,
+                                    struct ethtool_rxnfc *cmd)
+{
+       struct ethtool_rx_flow_spec *fsp =
+               (struct ethtool_rx_flow_spec *)&cmd->fs;
+       int err;
+
+       spin_lock(&adapter->nfc_lock);
+       err = igb_update_ethtool_nfc_entry(adapter, NULL, fsp->location);
+       spin_unlock(&adapter->nfc_lock);
+
+       return err;
+}
+
 static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
 {
        struct igb_adapter *adapter = netdev_priv(dev);
@@ -2607,6 +2947,11 @@ static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
        case ETHTOOL_SRXFH:
                ret = igb_set_rss_hash_opt(adapter, cmd);
                break;
+       case ETHTOOL_SRXCLSRLINS:
+               ret = igb_add_ethtool_nfc_entry(adapter, cmd);
+               break;
+       case ETHTOOL_SRXCLSRLDEL:
+               ret = igb_del_ethtool_nfc_entry(adapter, cmd);
        default:
                break;
        }
index 942a89f..af75eac 100644 (file)
@@ -176,6 +176,8 @@ static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
                                 struct ifla_vf_info *ivi);
 static void igb_check_vf_rate_limit(struct igb_adapter *);
+static void igb_nfc_filter_exit(struct igb_adapter *adapter);
+static void igb_nfc_filter_restore(struct igb_adapter *adapter);
 
 #ifdef CONFIG_PCI_IOV
 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
@@ -1611,6 +1613,7 @@ static void igb_configure(struct igb_adapter *adapter)
        igb_setup_mrqc(adapter);
        igb_setup_rctl(adapter);
 
+       igb_nfc_filter_restore(adapter);
        igb_configure_tx(adapter);
        igb_configure_rx(adapter);
 
@@ -2059,6 +2062,21 @@ static int igb_set_features(struct net_device *netdev,
        if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE)))
                return 0;
 
+       if (!(features & NETIF_F_NTUPLE)) {
+               struct hlist_node *node2;
+               struct igb_nfc_filter *rule;
+
+               spin_lock(&adapter->nfc_lock);
+               hlist_for_each_entry_safe(rule, node2,
+                                         &adapter->nfc_filter_list, nfc_node) {
+                       igb_erase_filter(adapter, rule);
+                       hlist_del(&rule->nfc_node);
+                       kfree(rule);
+               }
+               spin_unlock(&adapter->nfc_lock);
+               adapter->nfc_filter_count = 0;
+       }
+
        netdev->features = features;
 
        if (netif_running(netdev))
@@ -3053,6 +3071,7 @@ static int igb_sw_init(struct igb_adapter *adapter)
                                  VLAN_HLEN;
        adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
 
+       spin_lock_init(&adapter->nfc_lock);
        spin_lock_init(&adapter->stats64_lock);
 #ifdef CONFIG_PCI_IOV
        switch (hw->mac.type) {
@@ -3240,6 +3259,8 @@ static int __igb_close(struct net_device *netdev, bool suspending)
        igb_down(adapter);
        igb_free_irq(adapter);
 
+       igb_nfc_filter_exit(adapter);
+
        igb_free_all_tx_resources(adapter);
        igb_free_all_rx_resources(adapter);
 
@@ -8306,4 +8327,28 @@ int igb_reinit_queues(struct igb_adapter *adapter)
 
        return err;
 }
+
+static void igb_nfc_filter_exit(struct igb_adapter *adapter)
+{
+       struct igb_nfc_filter *rule;
+
+       spin_lock(&adapter->nfc_lock);
+
+       hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
+               igb_erase_filter(adapter, rule);
+
+       spin_unlock(&adapter->nfc_lock);
+}
+
+static void igb_nfc_filter_restore(struct igb_adapter *adapter)
+{
+       struct igb_nfc_filter *rule;
+
+       spin_lock(&adapter->nfc_lock);
+
+       hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
+               igb_add_filter(adapter, rule);
+
+       spin_unlock(&adapter->nfc_lock);
+}
 /* igb_main.c */
index 336c103..66dfa20 100644 (file)
@@ -998,12 +998,12 @@ static int igb_ptp_set_timestamp_mode(struct igb_adapter *adapter,
 
        /* define ethertype filter for timestamped packets */
        if (is_l2)
-               wr32(E1000_ETQF(3),
+               wr32(E1000_ETQF(IGB_ETQF_FILTER_1588),
                     (E1000_ETQF_FILTER_ENABLE | /* enable filter */
                      E1000_ETQF_1588 | /* enable timestamping */
                      ETH_P_1588));     /* 1588 eth protocol type */
        else
-               wr32(E1000_ETQF(3), 0);
+               wr32(E1000_ETQF(IGB_ETQF_FILTER_1588), 0);
 
        /* L4 Queue Filter[3]: filter by destination port and protocol */
        if (is_l4) {
index 9475ff9..b06e32d 100644 (file)
 #include "ixgbe_type.h"
 #include "ixgbe_common.h"
 #include "ixgbe_dcb.h"
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#if IS_ENABLED(CONFIG_FCOE)
 #define IXGBE_FCOE
 #include "ixgbe_fcoe.h"
-#endif /* CONFIG_FCOE or CONFIG_FCOE_MODULE */
+#endif /* IS_ENABLED(CONFIG_FCOE) */
 #ifdef CONFIG_IXGBE_DCA
 #include <linux/dca.h>
 #endif
@@ -645,6 +645,7 @@ struct ixgbe_adapter {
 #define IXGBE_FLAG_RX_HWTSTAMP_ENABLED         BIT(25)
 #define IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER     BIT(26)
 #define IXGBE_FLAG_DCB_CAPABLE                 BIT(27)
+#define IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE      BIT(28)
 
        u32 flags2;
 #define IXGBE_FLAG2_RSC_CAPABLE                        BIT(0)
@@ -653,13 +654,12 @@ struct ixgbe_adapter {
 #define IXGBE_FLAG2_TEMP_SENSOR_EVENT          BIT(3)
 #define IXGBE_FLAG2_SEARCH_FOR_SFP             BIT(4)
 #define IXGBE_FLAG2_SFP_NEEDS_RESET            BIT(5)
-#define IXGBE_FLAG2_RESET_REQUESTED            BIT(6)
 #define IXGBE_FLAG2_FDIR_REQUIRES_REINIT       BIT(7)
 #define IXGBE_FLAG2_RSS_FIELD_IPV4_UDP         BIT(8)
 #define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP         BIT(9)
 #define IXGBE_FLAG2_PTP_PPS_ENABLED            BIT(10)
 #define IXGBE_FLAG2_PHY_INTERRUPT              BIT(11)
-#define IXGBE_FLAG2_VXLAN_REREG_NEEDED         BIT(12)
+#define IXGBE_FLAG2_UDP_TUN_REREG_NEEDED       BIT(12)
 #define IXGBE_FLAG2_VLAN_PROMISC               BIT(13)
 
        /* Tx fast path data */
@@ -673,6 +673,7 @@ struct ixgbe_adapter {
 
        /* Port number used to identify VXLAN traffic */
        __be16 vxlan_port;
+       __be16 geneve_port;
 
        /* TX */
        struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp;
@@ -840,6 +841,7 @@ enum ixgbe_state_t {
        __IXGBE_IN_SFP_INIT,
        __IXGBE_PTP_RUNNING,
        __IXGBE_PTP_TX_IN_PROGRESS,
+       __IXGBE_RESET_REQUESTED,
 };
 
 struct ixgbe_cb {
index b4217f3..77d3039 100644 (file)
@@ -99,6 +99,7 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
                case IXGBE_DEV_ID_X550T:
                case IXGBE_DEV_ID_X550T1:
                case IXGBE_DEV_ID_X550EM_X_10G_T:
+               case IXGBE_DEV_ID_X550EM_A_10G_T:
                        supported = true;
                        break;
                default:
@@ -2958,8 +2959,10 @@ s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
        }
 
        /* was that the last pool using this rar? */
-       if (mpsar_lo == 0 && mpsar_hi == 0 && rar != 0)
+       if (mpsar_lo == 0 && mpsar_hi == 0 &&
+           rar != 0 && rar != hw->mac.san_mac_rar_index)
                hw->mac.ops.clear_rar(hw, rar);
+
        return 0;
 }
 
index 0d7209e..9547191 100644 (file)
@@ -193,7 +193,9 @@ static int ixgbe_get_settings(struct net_device *netdev,
        if (supported_link & IXGBE_LINK_SPEED_10GB_FULL)
                ecmd->supported |= ixgbe_get_supported_10gtypes(hw);
        if (supported_link & IXGBE_LINK_SPEED_1GB_FULL)
-               ecmd->supported |= SUPPORTED_1000baseT_Full;
+               ecmd->supported |= (ixgbe_isbackplane(hw->phy.media_type)) ?
+                                  SUPPORTED_1000baseKX_Full :
+                                  SUPPORTED_1000baseT_Full;
        if (supported_link & IXGBE_LINK_SPEED_100_FULL)
                ecmd->supported |= ixgbe_isbackplane(hw->phy.media_type) ?
                                   SUPPORTED_1000baseKX_Full :
index b4f0374..d76bc1a 100644 (file)
@@ -137,6 +137,7 @@ static const struct pci_device_id ixgbe_pci_tbl[] = {
        {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N), board_x550em_a },
        {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SGMII), board_x550em_a },
        {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SGMII_L), board_x550em_a },
+       {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_10G_T), board_x550em_a},
        {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SFP), board_x550em_a },
        /* required last entry */
        {0, }
@@ -1103,7 +1104,7 @@ static void ixgbe_tx_timeout_reset(struct ixgbe_adapter *adapter)
 
        /* Do the reset outside of interrupt context */
        if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
-               adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED;
+               set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
                e_warn(drv, "initiating reset due to tx timeout\n");
                ixgbe_service_event_schedule(adapter);
        }
@@ -1495,7 +1496,6 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring,
                                     struct sk_buff *skb)
 {
        __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
-       __le16 hdr_info = rx_desc->wb.lower.lo_dword.hs_rss.hdr_info;
        bool encap_pkt = false;
 
        skb_checksum_none_assert(skb);
@@ -1504,8 +1504,8 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring,
        if (!(ring->netdev->features & NETIF_F_RXCSUM))
                return;
 
-       if ((pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_VXLAN)) &&
-           (hdr_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_TUNNEL >> 16))) {
+       /* check for VXLAN and Geneve packets */
+       if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_VXLAN)) {
                encap_pkt = true;
                skb->encapsulation = 1;
        }
@@ -2777,7 +2777,7 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data)
                }
                if (eicr & IXGBE_EICR_ECC) {
                        e_info(link, "Received ECC Err, initiating reset\n");
-                       adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED;
+                       set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
                        ixgbe_service_event_schedule(adapter);
                        IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
                }
@@ -3007,7 +3007,7 @@ static irqreturn_t ixgbe_intr(int irq, void *data)
        case ixgbe_mac_x550em_a:
                if (eicr & IXGBE_EICR_ECC) {
                        e_info(link, "Received ECC Err, initiating reset\n");
-                       adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED;
+                       set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
                        ixgbe_service_event_schedule(adapter);
                        IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
                }
@@ -3224,7 +3224,7 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
                txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(reg_idx));
        } while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE));
        if (!wait_loop)
-               e_err(drv, "Could not enable Tx Queue %d\n", reg_idx);
+               hw_dbg(hw, "Could not enable Tx Queue %d\n", reg_idx);
 }
 
 static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
@@ -3922,6 +3922,9 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
        rfctl &= ~IXGBE_RFCTL_RSC_DIS;
        if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED))
                rfctl |= IXGBE_RFCTL_RSC_DIS;
+
+       /* disable NFS filtering */
+       rfctl |= (IXGBE_RFCTL_NFSW_DIS | IXGBE_RFCTL_NFSR_DIS);
        IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
 
        /* Program registers for the distribution of queues */
@@ -4586,18 +4589,23 @@ static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter)
        }
 }
 
-static void ixgbe_clear_vxlan_port(struct ixgbe_adapter *adapter)
+static void ixgbe_clear_udp_tunnel_port(struct ixgbe_adapter *adapter, u32 mask)
 {
-       switch (adapter->hw.mac.type) {
-       case ixgbe_mac_X550:
-       case ixgbe_mac_X550EM_x:
-       case ixgbe_mac_x550em_a:
-               IXGBE_WRITE_REG(&adapter->hw, IXGBE_VXLANCTRL, 0);
+       struct ixgbe_hw *hw = &adapter->hw;
+       u32 vxlanctrl;
+
+       if (!(adapter->flags & (IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE |
+                               IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE)))
+               return;
+
+       vxlanctrl = IXGBE_READ_REG(hw, IXGBE_VXLANCTRL) && ~mask;
+       IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, vxlanctrl);
+
+       if (mask & IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK)
                adapter->vxlan_port = 0;
-               break;
-       default:
-               break;
-       }
+
+       if (mask & IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK)
+               adapter->geneve_port = 0;
 }
 
 #ifdef CONFIG_IXGBE_DCB
@@ -5500,8 +5508,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
 
        ixgbe_napi_disable_all(adapter);
 
-       adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT |
-                            IXGBE_FLAG2_RESET_REQUESTED);
+       clear_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
+       adapter->flags2 &= ~IXGBE_FLAG2_FDIR_REQUIRES_REINIT;
        adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
 
        del_timer_sync(&adapter->service_timer);
@@ -5711,8 +5719,10 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
                if (fwsm & IXGBE_FWSM_TS_ENABLED)
                        adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
                break;
-       case ixgbe_mac_X550EM_x:
        case ixgbe_mac_x550em_a:
+               adapter->flags |= IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE;
+       /* fall through */
+       case ixgbe_mac_X550EM_x:
 #ifdef CONFIG_IXGBE_DCB
                adapter->flags &= ~IXGBE_FLAG_DCB_CAPABLE;
 #endif
@@ -6144,7 +6154,7 @@ int ixgbe_open(struct net_device *netdev)
 
        ixgbe_up_complete(adapter);
 
-       ixgbe_clear_vxlan_port(adapter);
+       ixgbe_clear_udp_tunnel_port(adapter, IXGBE_VXLANCTRL_ALL_UDPPORT_MASK);
        udp_tunnel_get_rx_info(netdev);
 
        return 0;
@@ -6921,7 +6931,7 @@ static void ixgbe_watchdog_flush_tx(struct ixgbe_adapter *adapter)
                         * (Do the reset outside of interrupt context).
                         */
                        e_warn(drv, "initiating reset to clear Tx work after link loss\n");
-                       adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED;
+                       set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
                }
        }
 }
@@ -7187,11 +7197,9 @@ static void ixgbe_phy_interrupt_subtask(struct ixgbe_adapter *adapter)
 
 static void ixgbe_reset_subtask(struct ixgbe_adapter *adapter)
 {
-       if (!(adapter->flags2 & IXGBE_FLAG2_RESET_REQUESTED))
+       if (!test_and_clear_bit(__IXGBE_RESET_REQUESTED, &adapter->state))
                return;
 
-       adapter->flags2 &= ~IXGBE_FLAG2_RESET_REQUESTED;
-
        /* If we're already down, removing or resetting, just bail */
        if (test_bit(__IXGBE_DOWN, &adapter->state) ||
            test_bit(__IXGBE_REMOVING, &adapter->state) ||
@@ -7225,9 +7233,9 @@ static void ixgbe_service_task(struct work_struct *work)
                ixgbe_service_event_complete(adapter);
                return;
        }
-       if (adapter->flags2 & IXGBE_FLAG2_VXLAN_REREG_NEEDED) {
+       if (adapter->flags2 & IXGBE_FLAG2_UDP_TUN_REREG_NEEDED) {
                rtnl_lock();
-               adapter->flags2 &= ~IXGBE_FLAG2_VXLAN_REREG_NEEDED;
+               adapter->flags2 &= ~IXGBE_FLAG2_UDP_TUN_REREG_NEEDED;
                udp_tunnel_get_rx_info(adapter->netdev);
                rtnl_unlock();
        }
@@ -7667,6 +7675,10 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
                if (adapter->vxlan_port &&
                    udp_hdr(skb)->dest == adapter->vxlan_port)
                        hdr.network = skb_inner_network_header(skb);
+
+               if (adapter->geneve_port &&
+                   udp_hdr(skb)->dest == adapter->geneve_port)
+                       hdr.network = skb_inner_network_header(skb);
        }
 
        /* Currently only IPv4/IPv6 with TCP is supported */
@@ -8802,10 +8814,23 @@ static int ixgbe_set_features(struct net_device *netdev,
        netdev->features = features;
 
        if ((adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) {
-               if (features & NETIF_F_RXCSUM)
-                       adapter->flags2 |= IXGBE_FLAG2_VXLAN_REREG_NEEDED;
-               else
-                       ixgbe_clear_vxlan_port(adapter);
+               if (features & NETIF_F_RXCSUM) {
+                       adapter->flags2 |= IXGBE_FLAG2_UDP_TUN_REREG_NEEDED;
+               } else {
+                       u32 port_mask = IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK;
+
+                       ixgbe_clear_udp_tunnel_port(adapter, port_mask);
+               }
+       }
+
+       if ((adapter->flags & IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE)) {
+               if (features & NETIF_F_RXCSUM) {
+                       adapter->flags2 |= IXGBE_FLAG2_UDP_TUN_REREG_NEEDED;
+               } else {
+                       u32 port_mask = IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK;
+
+                       ixgbe_clear_udp_tunnel_port(adapter, port_mask);
+               }
        }
 
        if (need_reset)
@@ -8818,67 +8843,115 @@ static int ixgbe_set_features(struct net_device *netdev,
 }
 
 /**
- * ixgbe_add_vxlan_port - Get notifications about VXLAN ports that come up
+ * ixgbe_add_udp_tunnel_port - Get notifications about adding UDP tunnel ports
  * @dev: The port's netdev
  * @ti: Tunnel endpoint information
  **/
-static void ixgbe_add_vxlan_port(struct net_device *dev,
-                                struct udp_tunnel_info *ti)
+static void ixgbe_add_udp_tunnel_port(struct net_device *dev,
+                                     struct udp_tunnel_info *ti)
 {
        struct ixgbe_adapter *adapter = netdev_priv(dev);
        struct ixgbe_hw *hw = &adapter->hw;
        __be16 port = ti->port;
-
-       if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
-               return;
+       u32 port_shift = 0;
+       u32 reg;
 
        if (ti->sa_family != AF_INET)
                return;
 
-       if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE))
-               return;
+       switch (ti->type) {
+       case UDP_TUNNEL_TYPE_VXLAN:
+               if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE))
+                       return;
 
-       if (adapter->vxlan_port == port)
-               return;
+               if (adapter->vxlan_port == port)
+                       return;
+
+               if (adapter->vxlan_port) {
+                       netdev_info(dev,
+                                   "VXLAN port %d set, not adding port %d\n",
+                                   ntohs(adapter->vxlan_port),
+                                   ntohs(port));
+                       return;
+               }
+
+               adapter->vxlan_port = port;
+               break;
+       case UDP_TUNNEL_TYPE_GENEVE:
+               if (!(adapter->flags & IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE))
+                       return;
+
+               if (adapter->geneve_port == port)
+                       return;
+
+               if (adapter->geneve_port) {
+                       netdev_info(dev,
+                                   "GENEVE port %d set, not adding port %d\n",
+                                   ntohs(adapter->geneve_port),
+                                   ntohs(port));
+                       return;
+               }
 
-       if (adapter->vxlan_port) {
-               netdev_info(dev,
-                           "Hit Max num of VXLAN ports, not adding port %d\n",
-                           ntohs(port));
+               port_shift = IXGBE_VXLANCTRL_GENEVE_UDPPORT_SHIFT;
+               adapter->geneve_port = port;
+               break;
+       default:
                return;
        }
 
-       adapter->vxlan_port = port;
-       IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, ntohs(port));
+       reg = IXGBE_READ_REG(hw, IXGBE_VXLANCTRL) | ntohs(port) << port_shift;
+       IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, reg);
 }
 
 /**
- * ixgbe_del_vxlan_port - Get notifications about VXLAN ports that go away
+ * ixgbe_del_udp_tunnel_port - Get notifications about removing UDP tunnel ports
  * @dev: The port's netdev
  * @ti: Tunnel endpoint information
  **/
-static void ixgbe_del_vxlan_port(struct net_device *dev,
-                                struct udp_tunnel_info *ti)
+static void ixgbe_del_udp_tunnel_port(struct net_device *dev,
+                                     struct udp_tunnel_info *ti)
 {
        struct ixgbe_adapter *adapter = netdev_priv(dev);
+       u32 port_mask;
 
-       if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+       if (ti->type != UDP_TUNNEL_TYPE_VXLAN &&
+           ti->type != UDP_TUNNEL_TYPE_GENEVE)
                return;
 
        if (ti->sa_family != AF_INET)
                return;
 
-       if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE))
-               return;
+       switch (ti->type) {
+       case UDP_TUNNEL_TYPE_VXLAN:
+               if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE))
+                       return;
 
-       if (adapter->vxlan_port != ti->port) {
-               netdev_info(dev, "Port %d was not found, not deleting\n",
-                           ntohs(ti->port));
+               if (adapter->vxlan_port != ti->port) {
+                       netdev_info(dev, "VXLAN port %d not found\n",
+                                   ntohs(ti->port));
+                       return;
+               }
+
+               port_mask = IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK;
+               break;
+       case UDP_TUNNEL_TYPE_GENEVE:
+               if (!(adapter->flags & IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE))
+                       return;
+
+               if (adapter->geneve_port != ti->port) {
+                       netdev_info(dev, "GENEVE port %d not found\n",
+                                   ntohs(ti->port));
+                       return;
+               }
+
+               port_mask = IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK;
+               break;
+       default:
                return;
        }
 
-       ixgbe_clear_vxlan_port(adapter);
-       adapter->flags2 |= IXGBE_FLAG2_VXLAN_REREG_NEEDED;
+       ixgbe_clear_udp_tunnel_port(adapter, port_mask);
+       adapter->flags2 |= IXGBE_FLAG2_UDP_TUN_REREG_NEEDED;
 }
 
 static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
@@ -9192,8 +9265,8 @@ static const struct net_device_ops ixgbe_netdev_ops = {
        .ndo_bridge_getlink     = ixgbe_ndo_bridge_getlink,
        .ndo_dfwd_add_station   = ixgbe_fwd_add,
        .ndo_dfwd_del_station   = ixgbe_fwd_del,
-       .ndo_udp_tunnel_add     = ixgbe_add_vxlan_port,
-       .ndo_udp_tunnel_del     = ixgbe_del_vxlan_port,
+       .ndo_udp_tunnel_add     = ixgbe_add_udp_tunnel_port,
+       .ndo_udp_tunnel_del     = ixgbe_del_udp_tunnel_port,
        .ndo_features_check     = ixgbe_features_check,
 };
 
index 1248a99..31d82e3 100644 (file)
@@ -90,6 +90,7 @@
 #define IXGBE_DEV_ID_X550EM_A_SFP_N    0x15C4
 #define IXGBE_DEV_ID_X550EM_A_SGMII    0x15C6
 #define IXGBE_DEV_ID_X550EM_A_SGMII_L  0x15C7
+#define IXGBE_DEV_ID_X550EM_A_10G_T    0x15C8
 #define IXGBE_DEV_ID_X550EM_A_SFP      0x15CE
 
 /* VF Device IDs */
@@ -487,6 +488,13 @@ struct ixgbe_thermal_sensor_data {
 #define IXGBE_FHFT_EXT(_n)     (0x09800 + ((_n) * 0x100)) /* Ext Flexible Host
                                                            * Filter Table */
 
+/* masks for accessing VXLAN and GENEVE UDP ports */
+#define IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK     0x0000ffff /* VXLAN port */
+#define IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK    0xffff0000 /* GENEVE port */
+#define IXGBE_VXLANCTRL_ALL_UDPPORT_MASK       0xffffffff /* GENEVE/VXLAN */
+
+#define IXGBE_VXLANCTRL_GENEVE_UDPPORT_SHIFT   16
+
 #define IXGBE_FLEXIBLE_FILTER_COUNT_MAX         4
 #define IXGBE_EXT_FLEXIBLE_FILTER_COUNT_MAX     2
 
@@ -1823,6 +1831,9 @@ enum {
 #define IXGBE_LED_IVRT(_i)       IXGBE_LED_OFFSET(IXGBE_LED_IVRT_BASE, _i)
 #define IXGBE_LED_BLINK(_i)      IXGBE_LED_OFFSET(IXGBE_LED_BLINK_BASE, _i)
 #define IXGBE_LED_MODE_MASK(_i)  IXGBE_LED_OFFSET(IXGBE_LED_MODE_MASK_BASE, _i)
+#define IXGBE_X557_LED_MANUAL_SET_MASK BIT(8)
+#define IXGBE_X557_MAX_LED_INDEX       3
+#define IXGBE_X557_LED_PROVISIONING    0xC430
 
 /* LED modes */
 #define IXGBE_LED_LINK_UP       0x0
index 4716ca4..e092a89 100644 (file)
@@ -295,6 +295,12 @@ static s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw)
        case IXGBE_DEV_ID_X550EM_A_KR_L:
                hw->phy.type = ixgbe_phy_x550em_kr;
                break;
+       case IXGBE_DEV_ID_X550EM_A_10G_T:
+               if (hw->bus.lan_id)
+                       hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY1_SM;
+               else
+                       hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM;
+               /* Fallthrough */
        case IXGBE_DEV_ID_X550EM_X_1G_T:
        case IXGBE_DEV_ID_X550EM_X_10G_T:
                return ixgbe_identify_phy_generic(hw);
@@ -2114,6 +2120,50 @@ static s32 ixgbe_reset_phy_t_X550em(struct ixgbe_hw *hw)
        return ixgbe_enable_lasi_ext_t_x550em(hw);
 }
 
+/**
+ *  ixgbe_led_on_t_x550em - Turns on the software controllable LEDs.
+ *  @hw: pointer to hardware structure
+ *  @led_idx: led number to turn on
+ **/
+s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
+{
+       u16 phy_data;
+
+       if (led_idx >= IXGBE_X557_MAX_LED_INDEX)
+               return IXGBE_ERR_PARAM;
+
+       /* To turn on the LED, set mode to ON. */
+       hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
+                            IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data);
+       phy_data |= IXGBE_X557_LED_MANUAL_SET_MASK;
+       hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
+                             IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data);
+
+       return 0;
+}
+
+/**
+ *  ixgbe_led_off_t_x550em - Turns off the software controllable LEDs.
+ *  @hw: pointer to hardware structure
+ *  @led_idx: led number to turn off
+ **/
+s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
+{
+       u16 phy_data;
+
+       if (led_idx >= IXGBE_X557_MAX_LED_INDEX)
+               return IXGBE_ERR_PARAM;
+
+       /* To turn on the LED, set mode to ON. */
+       hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
+                            IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data);
+       phy_data &= ~IXGBE_X557_LED_MANUAL_SET_MASK;
+       hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
+                             IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data);
+
+       return 0;
+}
+
 /** ixgbe_get_lcd_x550em - Determine lowest common denominator
  *  @hw: pointer to hardware structure
  *  @lcd_speed: pointer to lowest common link speed
@@ -2344,18 +2394,12 @@ static void ixgbe_read_mng_if_sel_x550em(struct ixgbe_hw *hw)
        /* If X552 (X550EM_a) and MDIO is connected to external PHY, then set
         * PHY address. This register field was has only been used for X552.
         */
-       if (!hw->phy.nw_mng_if_sel) {
-               if (hw->mac.type == ixgbe_mac_x550em_a) {
-                       struct ixgbe_adapter *adapter = hw->back;
-
-                       e_warn(drv, "nw_mng_if_sel not set\n");
-               }
-               return;
+       if (hw->mac.type == ixgbe_mac_x550em_a &&
+           hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_MDIO_ACT) {
+               hw->phy.mdio.prtad = (hw->phy.nw_mng_if_sel &
+                                     IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >>
+                                    IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT;
        }
-
-       hw->phy.mdio.prtad = (hw->phy.nw_mng_if_sel &
-                             IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >>
-                            IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT;
 }
 
 /** ixgbe_init_phy_ops_X550em - PHY/SFP specific init
@@ -2456,6 +2500,7 @@ static enum ixgbe_media_type ixgbe_get_media_type_X550em(struct ixgbe_hw *hw)
                break;
        case IXGBE_DEV_ID_X550EM_X_1G_T:
        case IXGBE_DEV_ID_X550EM_X_10G_T:
+       case IXGBE_DEV_ID_X550EM_A_10G_T:
                media_type = ixgbe_media_type_copper;
                break;
        default:
@@ -2514,6 +2559,9 @@ static void ixgbe_set_mdio_speed(struct ixgbe_hw *hw)
 
        switch (hw->device_id) {
        case IXGBE_DEV_ID_X550EM_X_10G_T:
+       case IXGBE_DEV_ID_X550EM_A_SGMII:
+       case IXGBE_DEV_ID_X550EM_A_SGMII_L:
+       case IXGBE_DEV_ID_X550EM_A_10G_T:
        case IXGBE_DEV_ID_X550EM_A_SFP:
                /* Config MDIO clock speed before the first MDIO PHY access */
                hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
@@ -2853,8 +2901,6 @@ static s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
        .write_analog_reg8              = NULL, \
        .set_rxpba                      = &ixgbe_set_rxpba_generic, \
        .check_link                     = &ixgbe_check_mac_link_generic, \
-       .led_on                         = &ixgbe_led_on_generic, \
-       .led_off                        = &ixgbe_led_off_generic, \
        .blink_led_start                = &ixgbe_blink_led_start_X540, \
        .blink_led_stop                 = &ixgbe_blink_led_stop_X540, \
        .set_rar                        = &ixgbe_set_rar_generic, \
@@ -2886,6 +2932,8 @@ static s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
 
 static const struct ixgbe_mac_operations mac_ops_X550 = {
        X550_COMMON_MAC
+       .led_on                 = ixgbe_led_on_generic,
+       .led_off                = ixgbe_led_off_generic,
        .reset_hw               = &ixgbe_reset_hw_X540,
        .get_media_type         = &ixgbe_get_media_type_X540,
        .get_san_mac_addr       = &ixgbe_get_san_mac_addr_generic,
@@ -2904,6 +2952,8 @@ static const struct ixgbe_mac_operations mac_ops_X550 = {
 
 static const struct ixgbe_mac_operations mac_ops_X550EM_x = {
        X550_COMMON_MAC
+       .led_on                 = ixgbe_led_on_t_x550em,
+       .led_off                = ixgbe_led_off_t_x550em,
        .reset_hw               = &ixgbe_reset_hw_X550em,
        .get_media_type         = &ixgbe_get_media_type_X550em,
        .get_san_mac_addr       = NULL,
@@ -2922,6 +2972,8 @@ static const struct ixgbe_mac_operations mac_ops_X550EM_x = {
 
 static struct ixgbe_mac_operations mac_ops_x550em_a = {
        X550_COMMON_MAC
+       .led_on                 = ixgbe_led_on_t_x550em,
+       .led_off                = ixgbe_led_off_t_x550em,
        .reset_hw               = ixgbe_reset_hw_X550em,
        .get_media_type         = ixgbe_get_media_type_X550em,
        .get_san_mac_addr       = NULL,
index be52f59..5639fbe 100644 (file)
@@ -502,12 +502,9 @@ extern void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector);
 void ixgbe_napi_add_all(struct ixgbevf_adapter *adapter);
 void ixgbe_napi_del_all(struct ixgbevf_adapter *adapter);
 
-#ifdef DEBUG
-char *ixgbevf_get_hw_dev_name(struct ixgbe_hw *hw);
-#define hw_dbg(hw, format, arg...) \
-       printk(KERN_DEBUG "%s: " format, ixgbevf_get_hw_dev_name(hw), ##arg)
-#else
-#define hw_dbg(hw, format, arg...) do {} while (0)
-#endif
+#define ixgbevf_hw_to_netdev(hw) \
+       (((struct ixgbevf_adapter *)(hw)->back)->netdev)
 
+#define hw_dbg(hw, format, arg...) \
+       netdev_dbg(ixgbevf_hw_to_netdev(hw), format, ## arg)
 #endif /* _IXGBEVF_H_ */
index d9d6616..4044608 100644 (file)
@@ -1612,7 +1612,7 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter,
                txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(reg_idx));
        }  while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE));
        if (!wait_loop)
-               pr_err("Could not enable Tx Queue %d\n", reg_idx);
+               hw_dbg(hw, "Could not enable Tx Queue %d\n", reg_idx);
 }
 
 /**
@@ -2993,6 +2993,7 @@ static void ixgbevf_free_all_tx_resources(struct ixgbevf_adapter *adapter)
  **/
 int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring)
 {
+       struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev);
        int size;
 
        size = sizeof(struct ixgbevf_tx_buffer) * tx_ring->count;
index a52f70e..d46ba1d 100644 (file)
@@ -284,7 +284,8 @@ static s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr)
        if (addr)
                ether_addr_copy(msg_addr, addr);
 
-       ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 3);
+       ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+                                            sizeof(msgbuf) / sizeof(u32));
        if (!ret_val) {
                msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
 
@@ -441,7 +442,8 @@ static s32 ixgbevf_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr,
        msgbuf[0] = IXGBE_VF_SET_MAC_ADDR;
        ether_addr_copy(msg_addr, addr);
 
-       ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2);
+       ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+                                            sizeof(msgbuf) / sizeof(u32));
 
        msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
 
@@ -551,7 +553,8 @@ static s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode)
        msgbuf[0] = IXGBE_VF_UPDATE_XCAST_MODE;
        msgbuf[1] = xcast_mode;
 
-       err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2);
+       err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+                                        sizeof(msgbuf) / sizeof(u32));
        if (err)
                return err;
 
@@ -588,7 +591,8 @@ static s32 ixgbevf_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind,
        /* Setting the 8 bit field MSG INFO to TRUE indicates "add" */
        msgbuf[0] |= vlan_on << IXGBE_VT_MSGINFO_SHIFT;
 
-       err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2);
+       err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+                                        sizeof(msgbuf) / sizeof(u32));
        if (err)
                goto mbx_err;
 
@@ -791,7 +795,8 @@ static s32 ixgbevf_set_rlpml_vf(struct ixgbe_hw *hw, u16 max_size)
        msgbuf[0] = IXGBE_VF_SET_LPE;
        msgbuf[1] = max_size;
 
-       ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2);
+       ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+                                            sizeof(msgbuf) / sizeof(u32));
        if (ret_val)
                return ret_val;
        if ((msgbuf[0] & IXGBE_VF_SET_LPE) &&
@@ -837,7 +842,8 @@ static int ixgbevf_negotiate_api_version_vf(struct ixgbe_hw *hw, int api)
        msg[1] = api;
        msg[2] = 0;
 
-       err = ixgbevf_write_msg_read_ack(hw, msg, msg, 3);
+       err = ixgbevf_write_msg_read_ack(hw, msg, msg,
+                                        sizeof(msg) / sizeof(u32));
        if (!err) {
                msg[0] &= ~IXGBE_VT_MSGTYPE_CTS;
 
@@ -887,7 +893,8 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs,
        msg[0] = IXGBE_VF_GET_QUEUE;
        msg[1] = msg[2] = msg[3] = msg[4] = 0;
 
-       err = ixgbevf_write_msg_read_ack(hw, msg, msg, 5);
+       err = ixgbevf_write_msg_read_ack(hw, msg, msg,
+                                        sizeof(msg) / sizeof(u32));
        if (!err) {
                msg[0] &= ~IXGBE_VT_MSGTYPE_CTS;
 
index e74fd44..a32de43 100644 (file)
@@ -133,7 +133,7 @@ struct mvneta_bm_pool {
 void *mvneta_frag_alloc(unsigned int frag_size);
 void mvneta_frag_free(unsigned int frag_size, void *data);
 
-#if defined(CONFIG_MVNETA_BM) || defined(CONFIG_MVNETA_BM_MODULE)
+#if IS_ENABLED(CONFIG_MVNETA_BM)
 void mvneta_bm_pool_destroy(struct mvneta_bm *priv,
                            struct mvneta_bm_pool *bm_pool, u8 port_map);
 void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
index 467138b..f05ea56 100644 (file)
@@ -3070,7 +3070,7 @@ static int sky2_poll(struct napi_struct *napi, int work_limit)
                        goto done;
        }
 
-       napi_complete(napi);
+       napi_complete_done(napi, work_done);
        sky2_read32(hw, B0_Y2_SP_LISR);
 done:
 
index 1801fd8..522fe8d 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
 #include <linux/clk.h>
+#include <linux/pm_runtime.h>
 #include <linux/if_vlan.h>
 #include <linux/reset.h>
 #include <linux/tcp.h>
@@ -50,6 +51,10 @@ static const struct mtk_ethtool_stats {
        MTK_ETHTOOL_STAT(rx_flow_control_packets),
 };
 
+static const char * const mtk_clks_source_name[] = {
+       "ethif", "esw", "gp1", "gp2"
+};
+
 void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg)
 {
        __raw_writel(val, eth->base + reg);
@@ -140,6 +145,9 @@ static void mtk_phy_link_adjust(struct net_device *dev)
                  MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN |
                  MAC_MCR_BACKPR_EN;
 
+       if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+               return;
+
        switch (mac->phy_dev->speed) {
        case SPEED_1000:
                mcr |= MAC_MCR_SPEED_1000;
@@ -226,7 +234,7 @@ static int mtk_phy_connect(struct mtk_mac *mac)
 {
        struct mtk_eth *eth = mac->hw;
        struct device_node *np;
-       u32 val, ge_mode;
+       u32 val;
 
        np = of_parse_phandle(mac->of_node, "phy-handle", 0);
        if (!np && of_phy_is_fixed_link(mac->of_node))
@@ -240,18 +248,18 @@ static int mtk_phy_connect(struct mtk_mac *mac)
        case PHY_INTERFACE_MODE_RGMII_RXID:
        case PHY_INTERFACE_MODE_RGMII_ID:
        case PHY_INTERFACE_MODE_RGMII:
-               ge_mode = 0;
+               mac->ge_mode = 0;
                break;
        case PHY_INTERFACE_MODE_MII:
-               ge_mode = 1;
+               mac->ge_mode = 1;
                break;
        case PHY_INTERFACE_MODE_REVMII:
-               ge_mode = 2;
+               mac->ge_mode = 2;
                break;
        case PHY_INTERFACE_MODE_RMII:
                if (!mac->id)
                        goto err_phy;
-               ge_mode = 3;
+               mac->ge_mode = 3;
                break;
        default:
                goto err_phy;
@@ -260,7 +268,7 @@ static int mtk_phy_connect(struct mtk_mac *mac)
        /* put the gmac into the right mode */
        regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
        val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, mac->id);
-       val |= SYSCFG0_GE_MODE(ge_mode, mac->id);
+       val |= SYSCFG0_GE_MODE(mac->ge_mode, mac->id);
        regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
 
        mtk_phy_connect_node(eth, mac, np);
@@ -291,7 +299,7 @@ err_phy:
 static int mtk_mdio_init(struct mtk_eth *eth)
 {
        struct device_node *mii_np;
-       int err;
+       int ret;
 
        mii_np = of_get_child_by_name(eth->dev->of_node, "mdio-bus");
        if (!mii_np) {
@@ -300,13 +308,13 @@ static int mtk_mdio_init(struct mtk_eth *eth)
        }
 
        if (!of_device_is_available(mii_np)) {
-               err = 0;
+               ret = -ENODEV;
                goto err_put_node;
        }
 
-       eth->mii_bus = mdiobus_alloc();
+       eth->mii_bus = devm_mdiobus_alloc(eth->dev);
        if (!eth->mii_bus) {
-               err = -ENOMEM;
+               ret = -ENOMEM;
                goto err_put_node;
        }
 
@@ -317,19 +325,11 @@ static int mtk_mdio_init(struct mtk_eth *eth)
        eth->mii_bus->parent = eth->dev;
 
        snprintf(eth->mii_bus->id, MII_BUS_ID_SIZE, "%s", mii_np->name);
-       err = of_mdiobus_register(eth->mii_bus, mii_np);
-       if (err)
-               goto err_free_bus;
-
-       return 0;
-
-err_free_bus:
-       mdiobus_free(eth->mii_bus);
+       ret = of_mdiobus_register(eth->mii_bus, mii_np);
 
 err_put_node:
        of_node_put(mii_np);
-       eth->mii_bus = NULL;
-       return err;
+       return ret;
 }
 
 static void mtk_mdio_cleanup(struct mtk_eth *eth)
@@ -338,29 +338,29 @@ static void mtk_mdio_cleanup(struct mtk_eth *eth)
                return;
 
        mdiobus_unregister(eth->mii_bus);
-       of_node_put(eth->mii_bus->dev.of_node);
-       mdiobus_free(eth->mii_bus);
 }
 
-static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask)
+static inline void mtk_irq_disable(struct mtk_eth *eth,
+                                  unsigned reg, u32 mask)
 {
        unsigned long flags;
        u32 val;
 
        spin_lock_irqsave(&eth->irq_lock, flags);
-       val = mtk_r32(eth, MTK_QDMA_INT_MASK);
-       mtk_w32(eth, val & ~mask, MTK_QDMA_INT_MASK);
+       val = mtk_r32(eth, reg);
+       mtk_w32(eth, val & ~mask, reg);
        spin_unlock_irqrestore(&eth->irq_lock, flags);
 }
 
-static inline void mtk_irq_enable(struct mtk_eth *eth, u32 mask)
+static inline void mtk_irq_enable(struct mtk_eth *eth,
+                                 unsigned reg, u32 mask)
 {
        unsigned long flags;
        u32 val;
 
        spin_lock_irqsave(&eth->irq_lock, flags);
-       val = mtk_r32(eth, MTK_QDMA_INT_MASK);
-       mtk_w32(eth, val | mask, MTK_QDMA_INT_MASK);
+       val = mtk_r32(eth, reg);
+       mtk_w32(eth, val | mask, reg);
        spin_unlock_irqrestore(&eth->irq_lock, flags);
 }
 
@@ -373,6 +373,9 @@ static int mtk_set_mac_address(struct net_device *dev, void *p)
        if (ret)
                return ret;
 
+       if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+               return -EBUSY;
+
        spin_lock_bh(&mac->hw->page_lock);
        mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1],
                MTK_GDMA_MAC_ADRH(mac->id));
@@ -587,14 +590,15 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
        dma_addr_t mapped_addr;
        unsigned int nr_frags;
        int i, n_desc = 1;
-       u32 txd4 = 0;
+       u32 txd4 = 0, fport;
 
        itxd = ring->next_free;
        if (itxd == ring->last_free)
                return -ENOMEM;
 
        /* set the forward port */
-       txd4 |= (mac->id + 1) << TX_DMA_FPORT_SHIFT;
+       fport = (mac->id + 1) << TX_DMA_FPORT_SHIFT;
+       txd4 |= fport;
 
        tx_buf = mtk_desc_to_tx_buf(ring, itxd);
        memset(tx_buf, 0, sizeof(*tx_buf));
@@ -652,7 +656,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
                        WRITE_ONCE(txd->txd3, (TX_DMA_SWC |
                                               TX_DMA_PLEN0(frag_map_size) |
                                               last_frag * TX_DMA_LS0));
-                       WRITE_ONCE(txd->txd4, 0);
+                       WRITE_ONCE(txd->txd4, fport);
 
                        tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
                        tx_buf = mtk_desc_to_tx_buf(ring, txd);
@@ -772,6 +776,9 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
         */
        spin_lock(&eth->page_lock);
 
+       if (unlikely(test_bit(MTK_RESETTING, &eth->state)))
+               goto drop;
+
        tx_num = mtk_cal_txd_req(skb);
        if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
                mtk_stop_queue(eth);
@@ -844,6 +851,9 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 
                netdev = eth->netdev[mac];
 
+               if (unlikely(test_bit(MTK_RESETTING, &eth->state)))
+                       goto release_desc;
+
                /* alloc new buffer */
                new_data = napi_alloc_frag(ring->frag_size);
                if (unlikely(!new_data)) {
@@ -863,7 +873,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
                /* receive data */
                skb = build_skb(data, ring->frag_size);
                if (unlikely(!skb)) {
-                       put_page(virt_to_head_page(new_data));
+                       skb_free_frag(new_data);
                        netdev->stats.rx_dropped++;
                        goto release_desc;
                }
@@ -893,17 +903,18 @@ release_desc:
                rxd->rxd2 = RX_DMA_PLEN0(ring->buf_size);
 
                ring->calc_idx = idx;
+
+               done++;
+       }
+
+       if (done) {
                /* make sure that all changes to the dma ring are flushed before
                 * we continue
                 */
                wmb();
-               mtk_w32(eth, ring->calc_idx, MTK_QRX_CRX_IDX0);
-               done++;
+               mtk_w32(eth, ring->calc_idx, MTK_PRX_CRX_IDX0);
        }
 
-       if (done < budget)
-               mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS);
-
        return done;
 }
 
@@ -1012,7 +1023,7 @@ static int mtk_napi_tx(struct napi_struct *napi, int budget)
                return budget;
 
        napi_complete(napi);
-       mtk_irq_enable(eth, MTK_TX_DONE_INT);
+       mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
 
        return tx_done;
 }
@@ -1022,30 +1033,33 @@ static int mtk_napi_rx(struct napi_struct *napi, int budget)
        struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi);
        u32 status, mask;
        int rx_done = 0;
+       int remain_budget = budget;
 
        mtk_handle_status_irq(eth);
-       mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS);
-       rx_done = mtk_poll_rx(napi, budget, eth);
+
+poll_again:
+       mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS);
+       rx_done = mtk_poll_rx(napi, remain_budget, eth);
 
        if (unlikely(netif_msg_intr(eth))) {
-               status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
-               mask = mtk_r32(eth, MTK_QDMA_INT_MASK);
+               status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
+               mask = mtk_r32(eth, MTK_PDMA_INT_MASK);
                dev_info(eth->dev,
                         "done rx %d, intr 0x%08x/0x%x\n",
                         rx_done, status, mask);
        }
-
-       if (rx_done == budget)
-               return budget;
-
-       status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
-       if (status & MTK_RX_DONE_INT)
+       if (rx_done == remain_budget)
                return budget;
 
+       status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
+       if (status & MTK_RX_DONE_INT) {
+               remain_budget -= rx_done;
+               goto poll_again;
+       }
        napi_complete(napi);
-       mtk_irq_enable(eth, MTK_RX_DONE_INT);
+       mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
 
-       return rx_done;
+       return rx_done + budget - remain_budget;
 }
 
 static int mtk_tx_alloc(struct mtk_eth *eth)
@@ -1092,6 +1106,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
        mtk_w32(eth,
                ring->phys + ((MTK_DMA_SIZE - 1) * sz),
                MTK_QTX_DRX_PTR);
+       mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0));
 
        return 0;
 
@@ -1162,11 +1177,10 @@ static int mtk_rx_alloc(struct mtk_eth *eth)
         */
        wmb();
 
-       mtk_w32(eth, eth->rx_ring.phys, MTK_QRX_BASE_PTR0);
-       mtk_w32(eth, MTK_DMA_SIZE, MTK_QRX_MAX_CNT0);
-       mtk_w32(eth, eth->rx_ring.calc_idx, MTK_QRX_CRX_IDX0);
-       mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_QDMA_RST_IDX);
-       mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0));
+       mtk_w32(eth, eth->rx_ring.phys, MTK_PRX_BASE_PTR0);
+       mtk_w32(eth, MTK_DMA_SIZE, MTK_PRX_MAX_CNT0);
+       mtk_w32(eth, eth->rx_ring.calc_idx, MTK_PRX_CRX_IDX0);
+       mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_PDMA_RST_IDX);
 
        return 0;
 }
@@ -1285,7 +1299,7 @@ static irqreturn_t mtk_handle_irq_rx(int irq, void *_eth)
 
        if (likely(napi_schedule_prep(&eth->rx_napi))) {
                __napi_schedule(&eth->rx_napi);
-               mtk_irq_disable(eth, MTK_RX_DONE_INT);
+               mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
        }
 
        return IRQ_HANDLED;
@@ -1297,7 +1311,7 @@ static irqreturn_t mtk_handle_irq_tx(int irq, void *_eth)
 
        if (likely(napi_schedule_prep(&eth->tx_napi))) {
                __napi_schedule(&eth->tx_napi);
-               mtk_irq_disable(eth, MTK_TX_DONE_INT);
+               mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
        }
 
        return IRQ_HANDLED;
@@ -1308,11 +1322,12 @@ static void mtk_poll_controller(struct net_device *dev)
 {
        struct mtk_mac *mac = netdev_priv(dev);
        struct mtk_eth *eth = mac->hw;
-       u32 int_mask = MTK_TX_DONE_INT | MTK_RX_DONE_INT;
 
-       mtk_irq_disable(eth, int_mask);
+       mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
+       mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
        mtk_handle_irq_rx(eth->irq[2], dev);
-       mtk_irq_enable(eth, int_mask);
+       mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
+       mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
 }
 #endif
 
@@ -1327,11 +1342,15 @@ static int mtk_start_dma(struct mtk_eth *eth)
        }
 
        mtk_w32(eth,
-               MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN |
-               MTK_RX_2B_OFFSET | MTK_DMA_SIZE_16DWORDS |
-               MTK_RX_BT_32DWORDS | MTK_NDP_CO_PRO,
+               MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
+               MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO,
                MTK_QDMA_GLO_CFG);
 
+       mtk_w32(eth,
+               MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
+               MTK_RX_BT_32DWORDS | MTK_MULTI_EN,
+               MTK_PDMA_GLO_CFG);
+
        return 0;
 }
 
@@ -1349,7 +1368,8 @@ static int mtk_open(struct net_device *dev)
 
                napi_enable(&eth->tx_napi);
                napi_enable(&eth->rx_napi);
-               mtk_irq_enable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+               mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
+               mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
        }
        atomic_inc(&eth->dma_refcnt);
 
@@ -1394,7 +1414,8 @@ static int mtk_stop(struct net_device *dev)
        if (!atomic_dec_and_test(&eth->dma_refcnt))
                return 0;
 
-       mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+       mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
+       mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
        napi_disable(&eth->tx_napi);
        napi_disable(&eth->rx_napi);
 
@@ -1405,15 +1426,44 @@ static int mtk_stop(struct net_device *dev)
        return 0;
 }
 
-static int __init mtk_hw_init(struct mtk_eth *eth)
+static void ethsys_reset(struct mtk_eth *eth, u32 reset_bits)
 {
-       int err, i;
+       regmap_update_bits(eth->ethsys, ETHSYS_RSTCTRL,
+                          reset_bits,
+                          reset_bits);
 
-       /* reset the frame engine */
-       reset_control_assert(eth->rstc);
-       usleep_range(10, 20);
-       reset_control_deassert(eth->rstc);
-       usleep_range(10, 20);
+       usleep_range(1000, 1100);
+       regmap_update_bits(eth->ethsys, ETHSYS_RSTCTRL,
+                          reset_bits,
+                          ~reset_bits);
+       mdelay(10);
+}
+
+static int mtk_hw_init(struct mtk_eth *eth)
+{
+       int i, val;
+
+       if (test_and_set_bit(MTK_HW_INIT, &eth->state))
+               return 0;
+
+       pm_runtime_enable(eth->dev);
+       pm_runtime_get_sync(eth->dev);
+
+       clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]);
+       clk_prepare_enable(eth->clks[MTK_CLK_ESW]);
+       clk_prepare_enable(eth->clks[MTK_CLK_GP1]);
+       clk_prepare_enable(eth->clks[MTK_CLK_GP2]);
+       ethsys_reset(eth, RSTCTRL_FE);
+       ethsys_reset(eth, RSTCTRL_PPE);
+
+       regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
+       for (i = 0; i < MTK_MAC_COUNT; i++) {
+               if (!eth->mac[i])
+                       continue;
+               val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, eth->mac[i]->id);
+               val |= SYSCFG0_GE_MODE(eth->mac[i]->ge_mode, eth->mac[i]->id);
+       }
+       regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
 
        /* Set GE2 driving and slew rate */
        regmap_write(eth->pctl, GPIO_DRV_SEL10, 0xa00);
@@ -1433,22 +1483,11 @@ static int __init mtk_hw_init(struct mtk_eth *eth)
        /* Enable RX VLan Offloading */
        mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
 
-       err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0,
-                              dev_name(eth->dev), eth);
-       if (err)
-               return err;
-       err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0,
-                              dev_name(eth->dev), eth);
-       if (err)
-               return err;
-
-       err = mtk_mdio_init(eth);
-       if (err)
-               return err;
-
        /* disable delay and normal interrupt */
        mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
-       mtk_irq_disable(eth, ~0);
+       mtk_w32(eth, 0, MTK_PDMA_DELAY_INT);
+       mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0);
+       mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0);
        mtk_w32(eth, RST_GL_PSE, MTK_RST_GL);
        mtk_w32(eth, 0, MTK_RST_GL);
 
@@ -1462,9 +1501,8 @@ static int __init mtk_hw_init(struct mtk_eth *eth)
        for (i = 0; i < 2; i++) {
                u32 val = mtk_r32(eth, MTK_GDMA_FWD_CFG(i));
 
-               /* setup the forward port to send frame to QDMA */
+               /* setup the forward port to send frame to PDMA */
                val &= ~0xffff;
-               val |= 0x5555;
 
                /* Enable RX checksum */
                val |= MTK_GDMA_ICS_EN | MTK_GDMA_TCS_EN | MTK_GDMA_UCS_EN;
@@ -1476,6 +1514,22 @@ static int __init mtk_hw_init(struct mtk_eth *eth)
        return 0;
 }
 
+static int mtk_hw_deinit(struct mtk_eth *eth)
+{
+       if (!test_and_clear_bit(MTK_HW_INIT, &eth->state))
+               return 0;
+
+       clk_disable_unprepare(eth->clks[MTK_CLK_GP2]);
+       clk_disable_unprepare(eth->clks[MTK_CLK_GP1]);
+       clk_disable_unprepare(eth->clks[MTK_CLK_ESW]);
+       clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]);
+
+       pm_runtime_put_sync(eth->dev);
+       pm_runtime_disable(eth->dev);
+
+       return 0;
+}
+
 static int __init mtk_init(struct net_device *dev)
 {
        struct mtk_mac *mac = netdev_priv(dev);
@@ -1504,7 +1558,8 @@ static void mtk_uninit(struct net_device *dev)
 
        phy_disconnect(mac->phy_dev);
        mtk_mdio_cleanup(eth);
-       mtk_irq_disable(eth, ~0);
+       mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0);
+       mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0);
        free_irq(eth->irq[1], dev);
        free_irq(eth->irq[2], dev);
 }
@@ -1533,6 +1588,12 @@ static void mtk_pending_work(struct work_struct *work)
 
        rtnl_lock();
 
+       dev_dbg(eth->dev, "[%s][%d] reset\n", __func__, __LINE__);
+
+       while (test_and_set_bit_lock(MTK_RESETTING, &eth->state))
+               cpu_relax();
+
+       dev_dbg(eth->dev, "[%s][%d] mtk_stop starts\n", __func__, __LINE__);
        /* stop all devices to make sure that dma is properly shut down */
        for (i = 0; i < MTK_MAC_COUNT; i++) {
                if (!eth->netdev[i])
@@ -1540,6 +1601,27 @@ static void mtk_pending_work(struct work_struct *work)
                mtk_stop(eth->netdev[i]);
                __set_bit(i, &restart);
        }
+       dev_dbg(eth->dev, "[%s][%d] mtk_stop ends\n", __func__, __LINE__);
+
+       /* restart underlying hardware such as power, clock, pin mux
+        * and the connected phy
+        */
+       mtk_hw_deinit(eth);
+
+       if (eth->dev->pins)
+               pinctrl_select_state(eth->dev->pins->p,
+                                    eth->dev->pins->default_state);
+       mtk_hw_init(eth);
+
+       for (i = 0; i < MTK_MAC_COUNT; i++) {
+               if (!eth->mac[i] ||
+                   of_phy_is_fixed_link(eth->mac[i]->of_node))
+                       continue;
+               err = phy_init_hw(eth->mac[i]->phy_dev);
+               if (err)
+                       dev_err(eth->dev, "%s: PHY init failed.\n",
+                               eth->netdev[i]->name);
+       }
 
        /* restart DMA and enable IRQs */
        for (i = 0; i < MTK_MAC_COUNT; i++) {
@@ -1552,20 +1634,44 @@ static void mtk_pending_work(struct work_struct *work)
                        dev_close(eth->netdev[i]);
                }
        }
+
+       dev_dbg(eth->dev, "[%s][%d] reset done\n", __func__, __LINE__);
+
+       clear_bit_unlock(MTK_RESETTING, &eth->state);
+
        rtnl_unlock();
 }
 
-static int mtk_cleanup(struct mtk_eth *eth)
+static int mtk_free_dev(struct mtk_eth *eth)
 {
        int i;
 
        for (i = 0; i < MTK_MAC_COUNT; i++) {
                if (!eth->netdev[i])
                        continue;
+               free_netdev(eth->netdev[i]);
+       }
+
+       return 0;
+}
+
+static int mtk_unreg_dev(struct mtk_eth *eth)
+{
+       int i;
 
+       for (i = 0; i < MTK_MAC_COUNT; i++) {
+               if (!eth->netdev[i])
+                       continue;
                unregister_netdev(eth->netdev[i]);
-               free_netdev(eth->netdev[i]);
        }
+
+       return 0;
+}
+
+static int mtk_cleanup(struct mtk_eth *eth)
+{
+       mtk_unreg_dev(eth);
+       mtk_free_dev(eth);
        cancel_work_sync(&eth->pending_work);
 
        return 0;
@@ -1577,6 +1683,9 @@ static int mtk_get_settings(struct net_device *dev,
        struct mtk_mac *mac = netdev_priv(dev);
        int err;
 
+       if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+               return -EBUSY;
+
        err = phy_read_status(mac->phy_dev);
        if (err)
                return -ENODEV;
@@ -1627,6 +1736,9 @@ static int mtk_nway_reset(struct net_device *dev)
 {
        struct mtk_mac *mac = netdev_priv(dev);
 
+       if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+               return -EBUSY;
+
        return genphy_restart_aneg(mac->phy_dev);
 }
 
@@ -1635,6 +1747,9 @@ static u32 mtk_get_link(struct net_device *dev)
        struct mtk_mac *mac = netdev_priv(dev);
        int err;
 
+       if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+               return -EBUSY;
+
        err = genphy_update_link(mac->phy_dev);
        if (err)
                return ethtool_op_get_link(dev);
@@ -1675,6 +1790,9 @@ static void mtk_get_ethtool_stats(struct net_device *dev,
        unsigned int start;
        int i;
 
+       if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+               return;
+
        if (netif_running(dev) && netif_device_present(dev)) {
                if (spin_trylock(&hwstats->stats_lock)) {
                        mtk_stats_update_mac(mac);
@@ -1683,7 +1801,7 @@ static void mtk_get_ethtool_stats(struct net_device *dev,
        }
 
        do {
-               data_src = (u64*)hwstats;
+               data_src = (u64 *)hwstats;
                data_dst = data;
                start = u64_stats_fetch_begin_irq(&hwstats->syncp);
 
@@ -1692,7 +1810,7 @@ static void mtk_get_ethtool_stats(struct net_device *dev,
        } while (u64_stats_fetch_retry_irq(&hwstats->syncp, start));
 }
 
-static struct ethtool_ops mtk_ethtool_ops = {
+static const struct ethtool_ops mtk_ethtool_ops = {
        .get_settings           = mtk_get_settings,
        .set_settings           = mtk_set_settings,
        .get_drvinfo            = mtk_get_drvinfo,
@@ -1776,16 +1894,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
        eth->netdev[id]->features |= MTK_HW_FEATURES;
        eth->netdev[id]->ethtool_ops = &mtk_ethtool_ops;
 
-       err = register_netdev(eth->netdev[id]);
-       if (err) {
-               dev_err(eth->dev, "error bringing up device\n");
-               goto free_netdev;
-       }
        eth->netdev[id]->irq = eth->irq[0];
-       netif_info(eth, probe, eth->netdev[id],
-                  "mediatek frame engine at 0x%08lx, irq %d\n",
-                  eth->netdev[id]->base_addr, eth->irq[0]);
-
        return 0;
 
 free_netdev:
@@ -1810,6 +1919,7 @@ static int mtk_probe(struct platform_device *pdev)
        if (!eth)
                return -ENOMEM;
 
+       eth->dev = &pdev->dev;
        eth->base = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(eth->base))
                return PTR_ERR(eth->base);
@@ -1831,12 +1941,6 @@ static int mtk_probe(struct platform_device *pdev)
                return PTR_ERR(eth->pctl);
        }
 
-       eth->rstc = devm_reset_control_get(&pdev->dev, "eth");
-       if (IS_ERR(eth->rstc)) {
-               dev_err(&pdev->dev, "no eth reset found\n");
-               return PTR_ERR(eth->rstc);
-       }
-
        for (i = 0; i < 3; i++) {
                eth->irq[i] = platform_get_irq(pdev, i);
                if (eth->irq[i] < 0) {
@@ -1844,21 +1948,16 @@ static int mtk_probe(struct platform_device *pdev)
                        return -ENXIO;
                }
        }
+       for (i = 0; i < ARRAY_SIZE(eth->clks); i++) {
+               eth->clks[i] = devm_clk_get(eth->dev,
+                                           mtk_clks_source_name[i]);
+               if (IS_ERR(eth->clks[i])) {
+                       if (PTR_ERR(eth->clks[i]) == -EPROBE_DEFER)
+                               return -EPROBE_DEFER;
+                       return -ENODEV;
+               }
+       }
 
-       eth->clk_ethif = devm_clk_get(&pdev->dev, "ethif");
-       eth->clk_esw = devm_clk_get(&pdev->dev, "esw");
-       eth->clk_gp1 = devm_clk_get(&pdev->dev, "gp1");
-       eth->clk_gp2 = devm_clk_get(&pdev->dev, "gp2");
-       if (IS_ERR(eth->clk_esw) || IS_ERR(eth->clk_gp1) ||
-           IS_ERR(eth->clk_gp2) || IS_ERR(eth->clk_ethif))
-               return -ENODEV;
-
-       clk_prepare_enable(eth->clk_ethif);
-       clk_prepare_enable(eth->clk_esw);
-       clk_prepare_enable(eth->clk_gp1);
-       clk_prepare_enable(eth->clk_gp2);
-
-       eth->dev = &pdev->dev;
        eth->msg_enable = netif_msg_init(mtk_msg_level, MTK_DEFAULT_MSG_ENABLE);
        INIT_WORK(&eth->pending_work, mtk_pending_work);
 
@@ -1876,7 +1975,35 @@ static int mtk_probe(struct platform_device *pdev)
 
                err = mtk_add_mac(eth, mac_np);
                if (err)
-                       goto err_free_dev;
+                       goto err_deinit_hw;
+       }
+
+       err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0,
+                              dev_name(eth->dev), eth);
+       if (err)
+               goto err_free_dev;
+
+       err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0,
+                              dev_name(eth->dev), eth);
+       if (err)
+               goto err_free_dev;
+
+       err = mtk_mdio_init(eth);
+       if (err)
+               goto err_free_dev;
+
+       for (i = 0; i < MTK_MAX_DEVS; i++) {
+               if (!eth->netdev[i])
+                       continue;
+
+               err = register_netdev(eth->netdev[i]);
+               if (err) {
+                       dev_err(eth->dev, "error bringing up device\n");
+                       goto err_deinit_mdio;
+               } else
+                       netif_info(eth, probe, eth->netdev[i],
+                                  "mediatek frame engine at 0x%08lx, irq %d\n",
+                                  eth->netdev[i]->base_addr, eth->irq[0]);
        }
 
        /* we run 2 devices on the same DMA ring so we need a dummy device
@@ -1892,19 +2019,29 @@ static int mtk_probe(struct platform_device *pdev)
 
        return 0;
 
+err_deinit_mdio:
+       mtk_mdio_cleanup(eth);
 err_free_dev:
-       mtk_cleanup(eth);
+       mtk_free_dev(eth);
+err_deinit_hw:
+       mtk_hw_deinit(eth);
+
        return err;
 }
 
 static int mtk_remove(struct platform_device *pdev)
 {
        struct mtk_eth *eth = platform_get_drvdata(pdev);
+       int i;
+
+       /* stop all devices to make sure that dma is properly shut down */
+       for (i = 0; i < MTK_MAC_COUNT; i++) {
+               if (!eth->netdev[i])
+                       continue;
+               mtk_stop(eth->netdev[i]);
+       }
 
-       clk_disable_unprepare(eth->clk_ethif);
-       clk_disable_unprepare(eth->clk_esw);
-       clk_disable_unprepare(eth->clk_gp1);
-       clk_disable_unprepare(eth->clk_gp2);
+       mtk_hw_deinit(eth);
 
        netif_napi_del(&eth->tx_napi);
        netif_napi_del(&eth->rx_napi);
index f82e3ac..79954b4 100644 (file)
 /* Unicast Filter MAC Address Register - High */
 #define MTK_GDMA_MAC_ADRH(x)   (0x50C + (x * 0x1000))
 
+/* PDMA RX Base Pointer Register */
+#define MTK_PRX_BASE_PTR0      0x900
+
+/* PDMA RX Maximum Count Register */
+#define MTK_PRX_MAX_CNT0       0x904
+
+/* PDMA RX CPU Pointer Register */
+#define MTK_PRX_CRX_IDX0       0x908
+
+/* PDMA Global Configuration Register */
+#define MTK_PDMA_GLO_CFG       0xa04
+#define MTK_MULTI_EN           BIT(10)
+
+/* PDMA Reset Index Register */
+#define MTK_PDMA_RST_IDX       0xa08
+#define MTK_PST_DRX_IDX0       BIT(16)
+
+/* PDMA Delay Interrupt Register */
+#define MTK_PDMA_DELAY_INT     0xa0c
+
+/* PDMA Interrupt Status Register */
+#define MTK_PDMA_INT_STATUS    0xa20
+
+/* PDMA Interrupt Mask Register */
+#define MTK_PDMA_INT_MASK      0xa28
+
 /* PDMA Interrupt grouping registers */
 #define MTK_PDMA_INT_GRP1      0xa50
 #define MTK_PDMA_INT_GRP2      0xa54
 
 /* QDMA Interrupt Status Register */
 #define MTK_QMTK_INT_STATUS    0x1A18
+#define MTK_RX_DONE_INT3       BIT(19)
+#define MTK_RX_DONE_INT2       BIT(18)
 #define MTK_RX_DONE_INT1       BIT(17)
 #define MTK_RX_DONE_INT0       BIT(16)
 #define MTK_TX_DONE_INT3       BIT(3)
 #define MTK_TX_DONE_INT2       BIT(2)
 #define MTK_TX_DONE_INT1       BIT(1)
 #define MTK_TX_DONE_INT0       BIT(0)
-#define MTK_RX_DONE_INT                (MTK_RX_DONE_INT0 | MTK_RX_DONE_INT1)
+#define MTK_RX_DONE_INT                (MTK_RX_DONE_INT0 | MTK_RX_DONE_INT1 | \
+                                MTK_RX_DONE_INT2 | MTK_RX_DONE_INT3)
 #define MTK_TX_DONE_INT                (MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \
                                 MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3)
 
 #define SYSCFG0_GE_MASK                0x3
 #define SYSCFG0_GE_MODE(x, y)  (x << (12 + (y * 2)))
 
+/*ethernet reset control register*/
+#define ETHSYS_RSTCTRL         0x34
+#define RSTCTRL_FE             BIT(6)
+#define RSTCTRL_PPE            BIT(31)
+
 struct mtk_rx_dma {
        unsigned int rxd1;
        unsigned int rxd2;
@@ -290,6 +324,22 @@ enum mtk_tx_flags {
        MTK_TX_FLAGS_PAGE0      = 0x02,
 };
 
+/* This enum allows us to identify how the clock is defined on the array of the
+ * clock in the order
+ */
+enum mtk_clks_map {
+       MTK_CLK_ETHIF,
+       MTK_CLK_ESW,
+       MTK_CLK_GP1,
+       MTK_CLK_GP2,
+       MTK_CLK_MAX
+};
+
+enum mtk_dev_state {
+       MTK_HW_INIT,
+       MTK_RESETTING
+};
+
 /* struct mtk_tx_buf - This struct holds the pointers to the memory pointed at
  *                     by the TX descriptor    s
  * @skb:               The SKB pointer of the packet being sent
@@ -370,18 +420,15 @@ struct mtk_rx_ring {
  * @scratch_ring:      Newer SoCs need memory for a second HW managed TX ring
  * @phy_scratch_ring:  physical address of scratch_ring
  * @scratch_head:      The scratch memory that scratch_ring points to.
- * @clk_ethif:         The ethif clock
- * @clk_esw:           The switch clock
- * @clk_gp1:           The gmac1 clock
- * @clk_gp2:           The gmac2 clock
+ * @clks:              clock array for all clocks required
  * @mii_bus:           If there is a bus we need to create an instance for it
  * @pending_work:      The workqueue used to reset the dma ring
+ * @state               Initialization and runtime state of the device.
  */
 
 struct mtk_eth {
        struct device                   *dev;
        void __iomem                    *base;
-       struct reset_control            *rstc;
        spinlock_t                      page_lock;
        spinlock_t                      irq_lock;
        struct net_device               dummy_dev;
@@ -400,17 +447,17 @@ struct mtk_eth {
        struct mtk_tx_dma               *scratch_ring;
        dma_addr_t                      phy_scratch_ring;
        void                            *scratch_head;
-       struct clk                      *clk_ethif;
-       struct clk                      *clk_esw;
-       struct clk                      *clk_gp1;
-       struct clk                      *clk_gp2;
+       struct clk                      *clks[MTK_CLK_MAX];
+
        struct mii_bus                  *mii_bus;
        struct work_struct              pending_work;
+       unsigned long                   state;
 };
 
 /* struct mtk_mac -    the structure that holds the info about the MACs of the
  *                     SoC
  * @id:                        The number of the MAC
+ * @ge_mode:            Interface mode kept for setup restoring
  * @of_node:           Our devicetree node
  * @hw:                        Backpointer to our main datastruture
  * @hw_stats:          Packet statistics counter
@@ -418,6 +465,7 @@ struct mtk_eth {
  */
 struct mtk_mac {
        int                             id;
+       int                             ge_mode;
        struct device_node              *of_node;
        struct mtk_eth                  *hw;
        struct mtk_hw_stats             *hw_stats;
index 99c6bbd..b04760a 100644 (file)
@@ -94,7 +94,7 @@ static u8 mlx4_en_dcbnl_getcap(struct net_device *dev, int capid, u8 *cap)
                *cap = true;
                break;
        case DCB_CAP_ATTR_DCBX:
-               *cap = priv->cee_params.dcbx_cap;
+               *cap = priv->dcbx_cap;
                break;
        case DCB_CAP_ATTR_PFC_TCS:
                *cap = 1 <<  mlx4_max_tc(priv->mdev->dev);
@@ -111,14 +111,14 @@ static u8 mlx4_en_dcbnl_getpfcstate(struct net_device *netdev)
 {
        struct mlx4_en_priv *priv = netdev_priv(netdev);
 
-       return priv->cee_params.dcb_cfg.pfc_state;
+       return priv->cee_config.pfc_state;
 }
 
 static void mlx4_en_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
 {
        struct mlx4_en_priv *priv = netdev_priv(netdev);
 
-       priv->cee_params.dcb_cfg.pfc_state = state;
+       priv->cee_config.pfc_state = state;
 }
 
 static void mlx4_en_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority,
@@ -126,7 +126,7 @@ static void mlx4_en_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority,
 {
        struct mlx4_en_priv *priv = netdev_priv(netdev);
 
-       *setting = priv->cee_params.dcb_cfg.tc_config[priority].dcb_pfc;
+       *setting = priv->cee_config.dcb_pfc[priority];
 }
 
 static void mlx4_en_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority,
@@ -134,8 +134,8 @@ static void mlx4_en_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority,
 {
        struct mlx4_en_priv *priv = netdev_priv(netdev);
 
-       priv->cee_params.dcb_cfg.tc_config[priority].dcb_pfc = setting;
-       priv->cee_params.dcb_cfg.pfc_state = true;
+       priv->cee_config.dcb_pfc[priority] = setting;
+       priv->cee_config.pfc_state = true;
 }
 
 static int mlx4_en_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num)
@@ -157,13 +157,11 @@ static u8 mlx4_en_dcbnl_set_all(struct net_device *netdev)
 {
        struct mlx4_en_priv *priv = netdev_priv(netdev);
        struct mlx4_en_dev *mdev = priv->mdev;
-       struct mlx4_en_cee_config *dcb_cfg = &priv->cee_params.dcb_cfg;
-       int err = 0;
 
-       if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE))
-               return -EINVAL;
+       if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+               return 1;
 
-       if (dcb_cfg->pfc_state) {
+       if (priv->cee_config.pfc_state) {
                int tc;
 
                priv->prof->rx_pause = 0;
@@ -171,7 +169,7 @@ static u8 mlx4_en_dcbnl_set_all(struct net_device *netdev)
                for (tc = 0; tc < CEE_DCBX_MAX_PRIO; tc++) {
                        u8 tc_mask = 1 << tc;
 
-                       switch (dcb_cfg->tc_config[tc].dcb_pfc) {
+                       switch (priv->cee_config.dcb_pfc[tc]) {
                        case pfc_disabled:
                                priv->prof->tx_ppp &= ~tc_mask;
                                priv->prof->rx_ppp &= ~tc_mask;
@@ -199,15 +197,17 @@ static u8 mlx4_en_dcbnl_set_all(struct net_device *netdev)
                en_dbg(DRV, priv, "Set pfc off\n");
        }
 
-       err = mlx4_SET_PORT_general(mdev->dev, priv->port,
-                                   priv->rx_skb_size + ETH_FCS_LEN,
-                                   priv->prof->tx_pause,
-                                   priv->prof->tx_ppp,
-                                   priv->prof->rx_pause,
-                                   priv->prof->rx_ppp);
-       if (err)
+       if (mlx4_SET_PORT_general(mdev->dev, priv->port,
+                                 priv->rx_skb_size + ETH_FCS_LEN,
+                                 priv->prof->tx_pause,
+                                 priv->prof->tx_ppp,
+                                 priv->prof->rx_pause,
+                                 priv->prof->rx_ppp)) {
                en_err(priv, "Failed setting pause params\n");
-       return err;
+               return 1;
+       }
+
+       return 0;
 }
 
 static u8 mlx4_en_dcbnl_get_state(struct net_device *dev)
@@ -225,7 +225,7 @@ static u8 mlx4_en_dcbnl_set_state(struct net_device *dev, u8 state)
        struct mlx4_en_priv *priv = netdev_priv(dev);
        int num_tcs = 0;
 
-       if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+       if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
                return 1;
 
        if (!!(state) == !!(priv->flags & MLX4_EN_FLAG_DCB_ENABLED))
@@ -238,7 +238,10 @@ static u8 mlx4_en_dcbnl_set_state(struct net_device *dev, u8 state)
                priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED;
        }
 
-       return mlx4_en_setup_tc(dev, num_tcs);
+       if (mlx4_en_setup_tc(dev, num_tcs))
+               return 1;
+
+       return 0;
 }
 
 /* On success returns a non-zero 802.1p user priority bitmap
@@ -252,7 +255,7 @@ static int mlx4_en_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id)
                                .selector = idtype,
                                .protocol = id,
                             };
-       if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+       if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
                return 0;
 
        return dcb_getapp(netdev, &app);
@@ -264,7 +267,7 @@ static int mlx4_en_dcbnl_setapp(struct net_device *netdev, u8 idtype,
        struct mlx4_en_priv *priv = netdev_priv(netdev);
        struct dcb_app app;
 
-       if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+       if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
                return -EINVAL;
 
        memset(&app, 0, sizeof(struct dcb_app));
@@ -433,7 +436,7 @@ static u8 mlx4_en_dcbnl_getdcbx(struct net_device *dev)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
 
-       return priv->cee_params.dcbx_cap;
+       return priv->dcbx_cap;
 }
 
 static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode)
@@ -442,7 +445,7 @@ static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode)
        struct ieee_ets ets = {0};
        struct ieee_pfc pfc = {0};
 
-       if (mode == priv->cee_params.dcbx_cap)
+       if (mode == priv->dcbx_cap)
                return 0;
 
        if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
@@ -451,7 +454,7 @@ static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode)
            !(mode & DCB_CAP_DCBX_HOST))
                goto err;
 
-       priv->cee_params.dcbx_cap = mode;
+       priv->dcbx_cap = mode;
 
        ets.ets_cap = IEEE_8021QAZ_MAX_TCS;
        pfc.pfc_cap = IEEE_8021QAZ_MAX_TCS;
index 4198e9b..62516f8 100644 (file)
@@ -71,10 +71,11 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up)
 #ifdef CONFIG_MLX4_EN_DCB
        if (!mlx4_is_slave(priv->mdev->dev)) {
                if (up) {
-                       priv->flags |= MLX4_EN_FLAG_DCB_ENABLED;
+                       if (priv->dcbx_cap)
+                               priv->flags |= MLX4_EN_FLAG_DCB_ENABLED;
                } else {
                        priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED;
-                       priv->cee_params.dcb_cfg.pfc_state = false;
+                       priv->cee_config.pfc_state = false;
                }
        }
 #endif /* CONFIG_MLX4_EN_DCB */
@@ -2642,12 +2643,16 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
                        if (IS_ERR(prog))
                                return PTR_ERR(prog);
                }
+               mutex_lock(&mdev->state_lock);
                for (i = 0; i < priv->rx_ring_num; i++) {
-                       /* This xchg is paired with READ_ONCE in the fastpath */
-                       old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog);
+                       old_prog = rcu_dereference_protected(
+                                       priv->rx_ring[i]->xdp_prog,
+                                       lockdep_is_held(&mdev->state_lock));
+                       rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog);
                        if (old_prog)
                                bpf_prog_put(old_prog);
                }
+               mutex_unlock(&mdev->state_lock);
                return 0;
        }
 
@@ -2680,7 +2685,10 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
                                                        priv->xdp_ring_num);
 
        for (i = 0; i < priv->rx_ring_num; i++) {
-               old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog);
+               old_prog = rcu_dereference_protected(
+                                       priv->rx_ring[i]->xdp_prog,
+                                       lockdep_is_held(&mdev->state_lock));
+               rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog);
                if (old_prog)
                        bpf_prog_put(old_prog);
        }
@@ -3048,9 +3056,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
        struct mlx4_en_priv *priv;
        int i;
        int err;
-#ifdef CONFIG_MLX4_EN_DCB
-       struct tc_configuration *tc;
-#endif
 
        dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv),
                                 MAX_TX_RINGS, MAX_RX_RINGS);
@@ -3117,16 +3122,13 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
        priv->msg_enable = MLX4_EN_MSG_LEVEL;
 #ifdef CONFIG_MLX4_EN_DCB
        if (!mlx4_is_slave(priv->mdev->dev)) {
-               priv->cee_params.dcbx_cap = DCB_CAP_DCBX_VER_CEE |
-                                           DCB_CAP_DCBX_HOST |
-                                           DCB_CAP_DCBX_VER_IEEE;
+               priv->dcbx_cap = DCB_CAP_DCBX_VER_CEE | DCB_CAP_DCBX_HOST |
+                       DCB_CAP_DCBX_VER_IEEE;
                priv->flags |= MLX4_EN_DCB_ENABLED;
-               priv->cee_params.dcb_cfg.pfc_state = false;
+               priv->cee_config.pfc_state = false;
 
-               for (i = 0; i < MLX4_EN_NUM_UP; i++) {
-                       tc = &priv->cee_params.dcb_cfg.tc_config[i];
-                       tc->dcb_pfc = pfc_disabled;
-               }
+               for (i = 0; i < MLX4_EN_NUM_UP; i++)
+                       priv->cee_config.dcb_pfc[i] = pfc_disabled;
 
                if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) {
                        dev->dcbnl_ops = &mlx4_en_dcbnl_ops;
index 2040dad..6758292 100644 (file)
@@ -537,7 +537,9 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
        struct mlx4_en_rx_ring *ring = *pring;
        struct bpf_prog *old_prog;
 
-       old_prog = READ_ONCE(ring->xdp_prog);
+       old_prog = rcu_dereference_protected(
+                                       ring->xdp_prog,
+                                       lockdep_is_held(&mdev->state_lock));
        if (old_prog)
                bpf_prog_put(old_prog);
        mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
@@ -800,7 +802,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
        if (budget <= 0)
                return polled;
 
-       xdp_prog = READ_ONCE(ring->xdp_prog);
+       /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
+       rcu_read_lock();
+       xdp_prog = rcu_dereference(ring->xdp_prog);
        doorbell_pending = 0;
        tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring;
 
@@ -858,15 +862,11 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
                                /* Drop the packet, since HW loopback-ed it */
                                mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX];
                                bucket = &priv->mac_hash[mac_hash];
-                               rcu_read_lock();
                                hlist_for_each_entry_rcu(entry, bucket, hlist) {
                                        if (ether_addr_equal_64bits(entry->mac,
-                                                                   ethh->h_source)) {
-                                               rcu_read_unlock();
+                                                                   ethh->h_source))
                                                goto next;
-                                       }
                                }
-                               rcu_read_unlock();
                        }
                }
 
@@ -1077,6 +1077,7 @@ consumed:
        }
 
 out:
+       rcu_read_unlock();
        if (doorbell_pending)
                mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]);
 
index 9df87ca..e2509bb 100644 (file)
@@ -818,7 +818,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
        real_size = get_real_size(skb, shinfo, dev, &lso_header_size,
                                  &inline_ok, &fragptr);
        if (unlikely(!real_size))
-               goto tx_drop;
+               goto tx_drop_count;
 
        /* Align descriptor to TXBB size */
        desc_size = ALIGN(real_size, TXBB_SIZE);
@@ -826,7 +826,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
        if (unlikely(nr_txbb > MAX_DESC_TXBBS)) {
                if (netif_msg_tx_err(priv))
                        en_warn(priv, "Oversized header or SG list\n");
-               goto tx_drop;
+               goto tx_drop_count;
        }
 
        bf_ok = ring->bf_enabled;
@@ -1071,9 +1071,10 @@ tx_drop_unmap:
                               PCI_DMA_TODEVICE);
        }
 
+tx_drop_count:
+       ring->tx_dropped++;
 tx_drop:
        dev_kfree_skb_any(skb);
-       ring->tx_dropped++;
        return NETDEV_TX_OK;
 }
 
@@ -1106,7 +1107,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
                goto tx_drop;
 
        if (mlx4_en_is_tx_ring_full(ring))
-               goto tx_drop;
+               goto tx_drop_count;
 
        /* fetch ring->cons far ahead before needing it to avoid stall */
        ring_cons = READ_ONCE(ring->cons);
@@ -1176,7 +1177,8 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
 
        return NETDEV_TX_OK;
 
-tx_drop:
+tx_drop_count:
        ring->tx_dropped++;
+tx_drop:
        return NETDEV_TX_BUSY;
 }
index 2c2913d..a3528dd 100644 (file)
@@ -340,7 +340,7 @@ struct mlx4_en_rx_ring {
        u8  fcs_del;
        void *buf;
        void *rx_info;
-       struct bpf_prog *xdp_prog;
+       struct bpf_prog __rcu *xdp_prog;
        struct mlx4_en_page_cache page_cache;
        unsigned long bytes;
        unsigned long packets;
@@ -482,20 +482,10 @@ enum dcb_pfc_type {
        pfc_enabled_rx
 };
 
-struct tc_configuration {
-       enum dcb_pfc_type  dcb_pfc;
-};
-
 struct mlx4_en_cee_config {
        bool    pfc_state;
-       struct  tc_configuration tc_config[MLX4_EN_NUM_UP];
+       enum    dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP];
 };
-
-struct mlx4_en_cee_params {
-       u8 dcbx_cap;
-       struct mlx4_en_cee_config dcb_cfg;
-};
-
 #endif
 
 struct ethtool_flow_id {
@@ -624,7 +614,8 @@ struct mlx4_en_priv {
        struct ieee_ets ets;
        u16 maxrate[IEEE_8021QAZ_MAX_TCS];
        enum dcbnl_cndd_states cndd_state[IEEE_8021QAZ_MAX_TCS];
-       struct mlx4_en_cee_params cee_params;
+       struct mlx4_en_cee_config cee_config;
+       u8 dcbx_cap;
 #endif
 #ifdef CONFIG_RFS_ACCEL
        spinlock_t filters_lock;
index 3d2095e..c5b2064 100644 (file)
@@ -52,7 +52,7 @@
 
 #define MLX4_FLAG_V_IGNORE_FCS_MASK            0x2
 #define MLX4_IGNORE_FCS_MASK                   0x1
-#define MLNX4_TX_MAX_NUMBER                    8
+#define MLX4_TC_MAX_NUMBER                     8
 
 void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table)
 {
@@ -2022,7 +2022,7 @@ int mlx4_max_tc(struct mlx4_dev *dev)
        u8 num_tc = dev->caps.max_tc_eth;
 
        if (!num_tc)
-               num_tc = MLNX4_TX_MAX_NUMBER;
+               num_tc = MLX4_TC_MAX_NUMBER;
 
        return num_tc;
 }
index 05cc1ef..0343725 100644 (file)
@@ -3,7 +3,7 @@ obj-$(CONFIG_MLX5_CORE)         += mlx5_core.o
 mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
                health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
                mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
-               fs_counters.o rl.o
+               fs_counters.o rl.o lag.o dev.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
                en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
index d6e2a1c..1e639f8 100644 (file)
@@ -143,13 +143,14 @@ static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
        return cmd->cmd_buf + (idx << cmd->log_stride);
 }
 
-static u8 xor8_buf(void *buf, int len)
+static u8 xor8_buf(void *buf, size_t offset, int len)
 {
        u8 *ptr = buf;
        u8 sum = 0;
        int i;
+       int end = len + offset;
 
-       for (i = 0; i < len; i++)
+       for (i = offset; i < end; i++)
                sum ^= ptr[i];
 
        return sum;
@@ -157,41 +158,49 @@ static u8 xor8_buf(void *buf, int len)
 
 static int verify_block_sig(struct mlx5_cmd_prot_block *block)
 {
-       if (xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 1) != 0xff)
+       size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
+       int xor_len = sizeof(*block) - sizeof(block->data) - 1;
+
+       if (xor8_buf(block, rsvd0_off, xor_len) != 0xff)
                return -EINVAL;
 
-       if (xor8_buf(block, sizeof(*block)) != 0xff)
+       if (xor8_buf(block, 0, sizeof(*block)) != 0xff)
                return -EINVAL;
 
        return 0;
 }
 
-static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token,
-                          int csum)
+static void calc_block_sig(struct mlx5_cmd_prot_block *block)
 {
-       block->token = token;
-       if (csum) {
-               block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) -
-                                           sizeof(block->data) - 2);
-               block->sig = ~xor8_buf(block, sizeof(*block) - 1);
-       }
+       int ctrl_xor_len = sizeof(*block) - sizeof(block->data) - 2;
+       size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
+
+       block->ctrl_sig = ~xor8_buf(block, rsvd0_off, ctrl_xor_len);
+       block->sig = ~xor8_buf(block, 0, sizeof(*block) - 1);
 }
 
-static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token, int csum)
+static void calc_chain_sig(struct mlx5_cmd_msg *msg)
 {
        struct mlx5_cmd_mailbox *next = msg->next;
-
-       while (next) {
-               calc_block_sig(next->buf, token, csum);
+       int size = msg->len;
+       int blen = size - min_t(int, sizeof(msg->first.data), size);
+       int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1)
+               / MLX5_CMD_DATA_BLOCK_SIZE;
+       int i = 0;
+
+       for (i = 0; i < n && next; i++)  {
+               calc_block_sig(next->buf);
                next = next->next;
        }
 }
 
 static void set_signature(struct mlx5_cmd_work_ent *ent, int csum)
 {
-       ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay));
-       calc_chain_sig(ent->in, ent->token, csum);
-       calc_chain_sig(ent->out, ent->token, csum);
+       ent->lay->sig = ~xor8_buf(ent->lay, 0,  sizeof(*ent->lay));
+       if (csum) {
+               calc_chain_sig(ent->in);
+               calc_chain_sig(ent->out);
+       }
 }
 
 static void poll_timeout(struct mlx5_cmd_work_ent *ent)
@@ -222,12 +231,17 @@ static int verify_signature(struct mlx5_cmd_work_ent *ent)
        struct mlx5_cmd_mailbox *next = ent->out->next;
        int err;
        u8 sig;
+       int size = ent->out->len;
+       int blen = size - min_t(int, sizeof(ent->out->first.data), size);
+       int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1)
+               / MLX5_CMD_DATA_BLOCK_SIZE;
+       int i = 0;
 
-       sig = xor8_buf(ent->lay, sizeof(*ent->lay));
+       sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay));
        if (sig != 0xff)
                return -EINVAL;
 
-       while (next) {
+       for (i = 0; i < n && next; i++) {
                err = verify_block_sig(next->buf);
                if (err)
                        return err;
@@ -280,11 +294,13 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_DEALLOC_Q_COUNTER:
        case MLX5_CMD_OP_DEALLOC_PD:
        case MLX5_CMD_OP_DEALLOC_UAR:
-       case MLX5_CMD_OP_DETTACH_FROM_MCG:
+       case MLX5_CMD_OP_DETACH_FROM_MCG:
        case MLX5_CMD_OP_DEALLOC_XRCD:
        case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN:
        case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT:
        case MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY:
+       case MLX5_CMD_OP_DESTROY_LAG:
+       case MLX5_CMD_OP_DESTROY_VPORT_LAG:
        case MLX5_CMD_OP_DESTROY_TIR:
        case MLX5_CMD_OP_DESTROY_SQ:
        case MLX5_CMD_OP_DESTROY_RQ:
@@ -301,6 +317,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
        case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
        case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
+       case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
                return MLX5_CMD_STAT_OK;
 
        case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -375,6 +392,10 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
        case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
        case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+       case MLX5_CMD_OP_CREATE_LAG:
+       case MLX5_CMD_OP_MODIFY_LAG:
+       case MLX5_CMD_OP_QUERY_LAG:
+       case MLX5_CMD_OP_CREATE_VPORT_LAG:
        case MLX5_CMD_OP_CREATE_TIR:
        case MLX5_CMD_OP_MODIFY_TIR:
        case MLX5_CMD_OP_QUERY_TIR:
@@ -402,6 +423,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
        case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
        case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+       case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
                *status = MLX5_DRIVER_STATUS_ABORTED;
                *synd = MLX5_DRIVER_SYND;
                return -EIO;
@@ -490,7 +512,7 @@ const char *mlx5_command_str(int command)
        MLX5_COMMAND_STR_CASE(CONFIG_INT_MODERATION);
        MLX5_COMMAND_STR_CASE(ACCESS_REG);
        MLX5_COMMAND_STR_CASE(ATTACH_TO_MCG);
-       MLX5_COMMAND_STR_CASE(DETTACH_FROM_MCG);
+       MLX5_COMMAND_STR_CASE(DETACH_FROM_MCG);
        MLX5_COMMAND_STR_CASE(GET_DROPPED_PACKET_LOG);
        MLX5_COMMAND_STR_CASE(MAD_IFC);
        MLX5_COMMAND_STR_CASE(QUERY_MAD_DEMUX);
@@ -512,6 +534,12 @@ const char *mlx5_command_str(int command)
        MLX5_COMMAND_STR_CASE(DELETE_L2_TABLE_ENTRY);
        MLX5_COMMAND_STR_CASE(SET_WOL_ROL);
        MLX5_COMMAND_STR_CASE(QUERY_WOL_ROL);
+       MLX5_COMMAND_STR_CASE(CREATE_LAG);
+       MLX5_COMMAND_STR_CASE(MODIFY_LAG);
+       MLX5_COMMAND_STR_CASE(QUERY_LAG);
+       MLX5_COMMAND_STR_CASE(DESTROY_LAG);
+       MLX5_COMMAND_STR_CASE(CREATE_VPORT_LAG);
+       MLX5_COMMAND_STR_CASE(DESTROY_VPORT_LAG);
        MLX5_COMMAND_STR_CASE(CREATE_TIR);
        MLX5_COMMAND_STR_CASE(MODIFY_TIR);
        MLX5_COMMAND_STR_CASE(DESTROY_TIR);
@@ -550,15 +578,130 @@ const char *mlx5_command_str(int command)
        MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
        MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
        MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
+       MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER);
+       MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
        default: return "unknown command opcode";
        }
 }
 
+static const char *cmd_status_str(u8 status)
+{
+       switch (status) {
+       case MLX5_CMD_STAT_OK:
+               return "OK";
+       case MLX5_CMD_STAT_INT_ERR:
+               return "internal error";
+       case MLX5_CMD_STAT_BAD_OP_ERR:
+               return "bad operation";
+       case MLX5_CMD_STAT_BAD_PARAM_ERR:
+               return "bad parameter";
+       case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:
+               return "bad system state";
+       case MLX5_CMD_STAT_BAD_RES_ERR:
+               return "bad resource";
+       case MLX5_CMD_STAT_RES_BUSY:
+               return "resource busy";
+       case MLX5_CMD_STAT_LIM_ERR:
+               return "limits exceeded";
+       case MLX5_CMD_STAT_BAD_RES_STATE_ERR:
+               return "bad resource state";
+       case MLX5_CMD_STAT_IX_ERR:
+               return "bad index";
+       case MLX5_CMD_STAT_NO_RES_ERR:
+               return "no resources";
+       case MLX5_CMD_STAT_BAD_INP_LEN_ERR:
+               return "bad input length";
+       case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:
+               return "bad output length";
+       case MLX5_CMD_STAT_BAD_QP_STATE_ERR:
+               return "bad QP state";
+       case MLX5_CMD_STAT_BAD_PKT_ERR:
+               return "bad packet (discarded)";
+       case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:
+               return "bad size too many outstanding CQEs";
+       default:
+               return "unknown status";
+       }
+}
+
+static int cmd_status_to_err(u8 status)
+{
+       switch (status) {
+       case MLX5_CMD_STAT_OK:                          return 0;
+       case MLX5_CMD_STAT_INT_ERR:                     return -EIO;
+       case MLX5_CMD_STAT_BAD_OP_ERR:                  return -EINVAL;
+       case MLX5_CMD_STAT_BAD_PARAM_ERR:               return -EINVAL;
+       case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:           return -EIO;
+       case MLX5_CMD_STAT_BAD_RES_ERR:                 return -EINVAL;
+       case MLX5_CMD_STAT_RES_BUSY:                    return -EBUSY;
+       case MLX5_CMD_STAT_LIM_ERR:                     return -ENOMEM;
+       case MLX5_CMD_STAT_BAD_RES_STATE_ERR:           return -EINVAL;
+       case MLX5_CMD_STAT_IX_ERR:                      return -EINVAL;
+       case MLX5_CMD_STAT_NO_RES_ERR:                  return -EAGAIN;
+       case MLX5_CMD_STAT_BAD_INP_LEN_ERR:             return -EIO;
+       case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:            return -EIO;
+       case MLX5_CMD_STAT_BAD_QP_STATE_ERR:            return -EINVAL;
+       case MLX5_CMD_STAT_BAD_PKT_ERR:                 return -EINVAL;
+       case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:      return -EINVAL;
+       default:                                        return -EIO;
+       }
+}
+
+struct mlx5_ifc_mbox_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_mbox_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         reserved_at_40[0x40];
+};
+
+void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome)
+{
+       *status = MLX5_GET(mbox_out, out, status);
+       *syndrome = MLX5_GET(mbox_out, out, syndrome);
+}
+
+static int mlx5_cmd_check(struct mlx5_core_dev *dev, void *in, void *out)
+{
+       u32 syndrome;
+       u8  status;
+       u16 opcode;
+       u16 op_mod;
+
+       mlx5_cmd_mbox_status(out, &status, &syndrome);
+       if (!status)
+               return 0;
+
+       opcode = MLX5_GET(mbox_in, in, opcode);
+       op_mod = MLX5_GET(mbox_in, in, op_mod);
+
+       mlx5_core_err(dev,
+                     "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n",
+                     mlx5_command_str(opcode),
+                     opcode, op_mod,
+                     cmd_status_str(status),
+                     status,
+                     syndrome);
+
+       return cmd_status_to_err(status);
+}
+
 static void dump_command(struct mlx5_core_dev *dev,
                         struct mlx5_cmd_work_ent *ent, int input)
 {
-       u16 op = be16_to_cpu(((struct mlx5_inbox_hdr *)(ent->lay->in))->opcode);
        struct mlx5_cmd_msg *msg = input ? ent->in : ent->out;
+       u16 op = MLX5_GET(mbox_in, ent->lay->in, opcode);
        struct mlx5_cmd_mailbox *next = msg->next;
        int data_only;
        u32 offset = 0;
@@ -608,9 +751,7 @@ static void dump_command(struct mlx5_core_dev *dev,
 
 static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
 {
-       struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
-
-       return be16_to_cpu(hdr->opcode);
+       return MLX5_GET(mbox_in, in->first.data, opcode);
 }
 
 static void cb_timeout_handler(struct work_struct *work)
@@ -656,7 +797,6 @@ static void cmd_work_handler(struct work_struct *work)
                spin_unlock_irqrestore(&cmd->alloc_lock, flags);
        }
 
-       ent->token = alloc_token(cmd);
        cmd->ent_arr[ent->idx] = ent;
        lay = get_inst(cmd, ent->idx);
        ent->lay = lay;
@@ -749,16 +889,6 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
        return err;
 }
 
-static __be32 *get_synd_ptr(struct mlx5_outbox_hdr *out)
-{
-       return &out->syndrome;
-}
-
-static u8 *get_status_ptr(struct mlx5_outbox_hdr *out)
-{
-       return &out->status;
-}
-
 /*  Notes:
  *    1. Callback functions may not sleep
  *    2. page queue commands do not support asynchrous completion
@@ -766,7 +896,8 @@ static u8 *get_status_ptr(struct mlx5_outbox_hdr *out)
 static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
                           struct mlx5_cmd_msg *out, void *uout, int uout_size,
                           mlx5_cmd_cbk_t callback,
-                          void *context, int page_queue, u8 *status)
+                          void *context, int page_queue, u8 *status,
+                          u8 token)
 {
        struct mlx5_cmd *cmd = &dev->cmd;
        struct mlx5_cmd_work_ent *ent;
@@ -783,6 +914,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
        if (IS_ERR(ent))
                return PTR_ERR(ent);
 
+       ent->token = token;
+
        if (!callback)
                init_completion(&ent->done);
 
@@ -804,7 +937,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
                goto out_free;
 
        ds = ent->ts2 - ent->ts1;
-       op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
+       op = MLX5_GET(mbox_in, in->first.data, opcode);
        if (op < ARRAY_SIZE(cmd->stats)) {
                stats = &cmd->stats[op];
                spin_lock_irq(&stats->lock);
@@ -854,7 +987,8 @@ static const struct file_operations fops = {
        .write  = dbg_write,
 };
 
-static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size)
+static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size,
+                           u8 token)
 {
        struct mlx5_cmd_prot_block *block;
        struct mlx5_cmd_mailbox *next;
@@ -880,6 +1014,7 @@ static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size)
                memcpy(block->data, from, copy);
                from += copy;
                size -= copy;
+               block->token = token;
                next = next->next;
        }
 
@@ -949,7 +1084,8 @@ static void free_cmd_box(struct mlx5_core_dev *dev,
 }
 
 static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
-                                              gfp_t flags, int size)
+                                              gfp_t flags, int size,
+                                              u8 token)
 {
        struct mlx5_cmd_mailbox *tmp, *head = NULL;
        struct mlx5_cmd_prot_block *block;
@@ -978,6 +1114,7 @@ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
                tmp->next = head;
                block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0);
                block->block_num = cpu_to_be32(n - i - 1);
+               block->token = token;
                head = tmp;
        }
        msg->next = head;
@@ -1015,7 +1152,6 @@ static ssize_t data_write(struct file *filp, const char __user *buf,
        struct mlx5_core_dev *dev = filp->private_data;
        struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
        void *ptr;
-       int err;
 
        if (*pos != 0)
                return -EINVAL;
@@ -1023,25 +1159,15 @@ static ssize_t data_write(struct file *filp, const char __user *buf,
        kfree(dbg->in_msg);
        dbg->in_msg = NULL;
        dbg->inlen = 0;
-
-       ptr = kzalloc(count, GFP_KERNEL);
-       if (!ptr)
-               return -ENOMEM;
-
-       if (copy_from_user(ptr, buf, count)) {
-               err = -EFAULT;
-               goto out;
-       }
+       ptr = memdup_user(buf, count);
+       if (IS_ERR(ptr))
+               return PTR_ERR(ptr);
        dbg->in_msg = ptr;
        dbg->inlen = count;
 
        *pos = count;
 
        return count;
-
-out:
-       kfree(ptr);
-       return err;
 }
 
 static ssize_t data_read(struct file *filp, char __user *buf, size_t count,
@@ -1301,11 +1427,16 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec)
                                callback = ent->callback;
                                context = ent->context;
                                err = ent->ret;
-                               if (!err)
+                               if (!err) {
                                        err = mlx5_copy_from_msg(ent->uout,
                                                                 ent->out,
                                                                 ent->uout_size);
 
+                                       err = err ? err : mlx5_cmd_check(dev,
+                                                                       ent->in->first.data,
+                                                                       ent->uout);
+                               }
+
                                mlx5_free_cmd_msg(dev, ent->out);
                                free_msg(dev, ent->in);
 
@@ -1352,19 +1483,14 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
        }
 
        if (IS_ERR(msg))
-               msg = mlx5_alloc_cmd_msg(dev, gfp, in_size);
+               msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0);
 
        return msg;
 }
 
-static u16 opcode_from_in(struct mlx5_inbox_hdr *in)
+static int is_manage_pages(void *in)
 {
-       return be16_to_cpu(in->opcode);
-}
-
-static int is_manage_pages(struct mlx5_inbox_hdr *in)
-{
-       return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES;
+       return MLX5_GET(mbox_in, in, opcode) == MLX5_CMD_OP_MANAGE_PAGES;
 }
 
 static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
@@ -1377,12 +1503,15 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
        int err;
        u8 status = 0;
        u32 drv_synd;
+       u8 token;
 
        if (pci_channel_offline(dev->pdev) ||
            dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-               err = mlx5_internal_err_ret_value(dev, opcode_from_in(in), &drv_synd, &status);
-               *get_synd_ptr(out) = cpu_to_be32(drv_synd);
-               *get_status_ptr(out) = status;
+               u16 opcode = MLX5_GET(mbox_in, in, opcode);
+
+               err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
+               MLX5_SET(mbox_out, out, status, status);
+               MLX5_SET(mbox_out, out, syndrome, drv_synd);
                return err;
        }
 
@@ -1395,20 +1524,22 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
                return err;
        }
 
-       err = mlx5_copy_to_msg(inb, in, in_size);
+       token = alloc_token(&dev->cmd);
+
+       err = mlx5_copy_to_msg(inb, in, in_size, token);
        if (err) {
                mlx5_core_warn(dev, "err %d\n", err);
                goto out_in;
        }
 
-       outb = mlx5_alloc_cmd_msg(dev, gfp, out_size);
+       outb = mlx5_alloc_cmd_msg(dev, gfp, out_size, token);
        if (IS_ERR(outb)) {
                err = PTR_ERR(outb);
                goto out_in;
        }
 
        err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
-                             pages_queue, &status);
+                             pages_queue, &status, token);
        if (err)
                goto out_out;
 
@@ -1434,7 +1565,10 @@ out_in:
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
                  int out_size)
 {
-       return cmd_exec(dev, in, in_size, out, out_size, NULL, NULL);
+       int err;
+
+       err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL);
+       return err ? : mlx5_cmd_check(dev, in, out);
 }
 EXPORT_SYMBOL(mlx5_cmd_exec);
 
@@ -1476,7 +1610,7 @@ static int create_msg_cache(struct mlx5_core_dev *dev)
        INIT_LIST_HEAD(&cmd->cache.med.head);
 
        for (i = 0; i < NUM_LONG_LISTS; i++) {
-               msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE);
+               msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE, 0);
                if (IS_ERR(msg)) {
                        err = PTR_ERR(msg);
                        goto ex_err;
@@ -1486,7 +1620,7 @@ static int create_msg_cache(struct mlx5_core_dev *dev)
        }
 
        for (i = 0; i < NUM_MED_LISTS; i++) {
-               msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE);
+               msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE, 0);
                if (IS_ERR(msg)) {
                        err = PTR_ERR(msg);
                        goto ex_err;
@@ -1671,96 +1805,3 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev)
        pci_pool_destroy(cmd->pool);
 }
 EXPORT_SYMBOL(mlx5_cmd_cleanup);
-
-static const char *cmd_status_str(u8 status)
-{
-       switch (status) {
-       case MLX5_CMD_STAT_OK:
-               return "OK";
-       case MLX5_CMD_STAT_INT_ERR:
-               return "internal error";
-       case MLX5_CMD_STAT_BAD_OP_ERR:
-               return "bad operation";
-       case MLX5_CMD_STAT_BAD_PARAM_ERR:
-               return "bad parameter";
-       case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:
-               return "bad system state";
-       case MLX5_CMD_STAT_BAD_RES_ERR:
-               return "bad resource";
-       case MLX5_CMD_STAT_RES_BUSY:
-               return "resource busy";
-       case MLX5_CMD_STAT_LIM_ERR:
-               return "limits exceeded";
-       case MLX5_CMD_STAT_BAD_RES_STATE_ERR:
-               return "bad resource state";
-       case MLX5_CMD_STAT_IX_ERR:
-               return "bad index";
-       case MLX5_CMD_STAT_NO_RES_ERR:
-               return "no resources";
-       case MLX5_CMD_STAT_BAD_INP_LEN_ERR:
-               return "bad input length";
-       case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:
-               return "bad output length";
-       case MLX5_CMD_STAT_BAD_QP_STATE_ERR:
-               return "bad QP state";
-       case MLX5_CMD_STAT_BAD_PKT_ERR:
-               return "bad packet (discarded)";
-       case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:
-               return "bad size too many outstanding CQEs";
-       default:
-               return "unknown status";
-       }
-}
-
-static int cmd_status_to_err(u8 status)
-{
-       switch (status) {
-       case MLX5_CMD_STAT_OK:                          return 0;
-       case MLX5_CMD_STAT_INT_ERR:                     return -EIO;
-       case MLX5_CMD_STAT_BAD_OP_ERR:                  return -EINVAL;
-       case MLX5_CMD_STAT_BAD_PARAM_ERR:               return -EINVAL;
-       case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:           return -EIO;
-       case MLX5_CMD_STAT_BAD_RES_ERR:                 return -EINVAL;
-       case MLX5_CMD_STAT_RES_BUSY:                    return -EBUSY;
-       case MLX5_CMD_STAT_LIM_ERR:                     return -ENOMEM;
-       case MLX5_CMD_STAT_BAD_RES_STATE_ERR:           return -EINVAL;
-       case MLX5_CMD_STAT_IX_ERR:                      return -EINVAL;
-       case MLX5_CMD_STAT_NO_RES_ERR:                  return -EAGAIN;
-       case MLX5_CMD_STAT_BAD_INP_LEN_ERR:             return -EIO;
-       case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:            return -EIO;
-       case MLX5_CMD_STAT_BAD_QP_STATE_ERR:            return -EINVAL;
-       case MLX5_CMD_STAT_BAD_PKT_ERR:                 return -EINVAL;
-       case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:      return -EINVAL;
-       default:                                        return -EIO;
-       }
-}
-
-/* this will be available till all the commands use set/get macros */
-int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr)
-{
-       if (!hdr->status)
-               return 0;
-
-       pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n",
-               cmd_status_str(hdr->status), hdr->status,
-               be32_to_cpu(hdr->syndrome));
-
-       return cmd_status_to_err(hdr->status);
-}
-
-int mlx5_cmd_status_to_err_v2(void *ptr)
-{
-       u32     syndrome;
-       u8      status;
-
-       status = be32_to_cpu(*(__be32 *)ptr) >> 24;
-       if (!status)
-               return 0;
-
-       syndrome = be32_to_cpu(*(__be32 *)(ptr + 4));
-
-       pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n",
-               cmd_status_str(status), status, syndrome);
-
-       return cmd_status_to_err(status);
-}
index 873a631..32d4af9 100644 (file)
@@ -134,33 +134,29 @@ void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type)
                complete(&cq->free);
 }
 
-
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-                       struct mlx5_create_cq_mbox_in *in, int inlen)
+                       u32 *in, int inlen)
 {
-       int err;
        struct mlx5_cq_table *table = &dev->priv.cq_table;
-       struct mlx5_create_cq_mbox_out out;
-       struct mlx5_destroy_cq_mbox_in din;
-       struct mlx5_destroy_cq_mbox_out dout;
+       u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+       u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
+       u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
        int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
                           c_eqn);
        struct mlx5_eq *eq;
+       int err;
 
        eq = mlx5_eqn2eq(dev, eqn);
        if (IS_ERR(eq))
                return PTR_ERR(eq);
 
-       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ);
-       memset(&out, 0, sizeof(out));
-       err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+       memset(out, 0, sizeof(out));
+       MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ);
+       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
        if (err)
                return err;
 
-       if (out.hdr.status)
-               return mlx5_cmd_status_to_err(&out.hdr);
-
-       cq->cqn = be32_to_cpu(out.cqn) & 0xffffff;
+       cq->cqn = MLX5_GET(create_cq_out, out, cqn);
        cq->cons_index = 0;
        cq->arm_sn     = 0;
        atomic_set(&cq->refcount, 1);
@@ -186,10 +182,11 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
        return 0;
 
 err_cmd:
-       memset(&din, 0, sizeof(din));
-       memset(&dout, 0, sizeof(dout));
-       din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ);
-       mlx5_cmd_exec(dev, &din, sizeof(din), &dout, sizeof(dout));
+       memset(din, 0, sizeof(din));
+       memset(dout, 0, sizeof(dout));
+       MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
+       MLX5_SET(destroy_cq_in, din, cqn, cq->cqn);
+       mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
        return err;
 }
 EXPORT_SYMBOL(mlx5_core_create_cq);
@@ -197,8 +194,8 @@ EXPORT_SYMBOL(mlx5_core_create_cq);
 int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 {
        struct mlx5_cq_table *table = &dev->priv.cq_table;
-       struct mlx5_destroy_cq_mbox_in in;
-       struct mlx5_destroy_cq_mbox_out out;
+       u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
        struct mlx5_core_cq *tmp;
        int err;
 
@@ -214,17 +211,12 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
                return -EINVAL;
        }
 
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ);
-       in.cqn = cpu_to_be32(cq->cqn);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
+       MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
        if (err)
                return err;
 
-       if (out.hdr.status)
-               return mlx5_cmd_status_to_err(&out.hdr);
-
        synchronize_irq(cq->irqn);
 
        mlx5_debug_cq_remove(dev, cq);
@@ -237,44 +229,23 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 EXPORT_SYMBOL(mlx5_core_destroy_cq);
 
 int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-                      struct mlx5_query_cq_mbox_out *out)
+                      u32 *out, int outlen)
 {
-       struct mlx5_query_cq_mbox_in in;
-       int err;
-
-       memset(&in, 0, sizeof(in));
-       memset(out, 0, sizeof(*out));
-
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_CQ);
-       in.cqn = cpu_to_be32(cq->cqn);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
-       if (err)
-               return err;
-
-       if (out->hdr.status)
-               return mlx5_cmd_status_to_err(&out->hdr);
+       u32 in[MLX5_ST_SZ_DW(query_cq_in)] = {0};
 
-       return err;
+       MLX5_SET(query_cq_in, in, opcode, MLX5_CMD_OP_QUERY_CQ);
+       MLX5_SET(query_cq_in, in, cqn, cq->cqn);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 EXPORT_SYMBOL(mlx5_core_query_cq);
 
-
 int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-                       struct mlx5_modify_cq_mbox_in *in, int in_sz)
+                       u32 *in, int inlen)
 {
-       struct mlx5_modify_cq_mbox_out out;
-       int err;
-
-       memset(&out, 0, sizeof(out));
-       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MODIFY_CQ);
-       err = mlx5_cmd_exec(dev, in, in_sz, &out, sizeof(out));
-       if (err)
-               return err;
+       u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0};
 
-       if (out.hdr.status)
-               return mlx5_cmd_status_to_err(&out.hdr);
-
-       return 0;
+       MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ);
+       return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_modify_cq);
 
@@ -283,18 +254,20 @@ int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev,
                                   u16 cq_period,
                                   u16 cq_max_count)
 {
-       struct mlx5_modify_cq_mbox_in in;
-
-       memset(&in, 0, sizeof(in));
-
-       in.cqn              = cpu_to_be32(cq->cqn);
-       in.ctx.cq_period    = cpu_to_be16(cq_period);
-       in.ctx.cq_max_count = cpu_to_be16(cq_max_count);
-       in.field_select     = cpu_to_be32(MLX5_CQ_MODIFY_PERIOD |
-                                         MLX5_CQ_MODIFY_COUNT);
-
-       return mlx5_core_modify_cq(dev, cq, &in, sizeof(in));
+       u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {0};
+       void *cqc;
+
+       MLX5_SET(modify_cq_in, in, cqn, cq->cqn);
+       cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context);
+       MLX5_SET(cqc, cqc, cq_period, cq_period);
+       MLX5_SET(cqc, cqc, cq_max_count, cq_max_count);
+       MLX5_SET(modify_cq_in, in,
+                modify_field_select_resize_field_select.modify_field_select.modify_field_select,
+                MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT);
+
+       return mlx5_core_modify_cq(dev, cq, in, sizeof(in));
 }
+EXPORT_SYMBOL(mlx5_core_modify_cq_moderation);
 
 int mlx5_init_cq_table(struct mlx5_core_dev *dev)
 {
index 5210d92..e94a953 100644 (file)
@@ -277,24 +277,28 @@ void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev)
 static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
                         int index, int *is_str)
 {
-       struct mlx5_query_qp_mbox_out *out;
+       int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
        struct mlx5_qp_context *ctx;
        u64 param = 0;
+       u32 *out;
        int err;
        int no_sq;
 
-       out = kzalloc(sizeof(*out), GFP_KERNEL);
+       out = kzalloc(outlen, GFP_KERNEL);
        if (!out)
                return param;
 
-       err = mlx5_core_qp_query(dev, qp, out, sizeof(*out));
+       err = mlx5_core_qp_query(dev, qp, out, outlen);
        if (err) {
-               mlx5_core_warn(dev, "failed to query qp\n");
+               mlx5_core_warn(dev, "failed to query qp err=%d\n", err);
                goto out;
        }
 
        *is_str = 0;
-       ctx = &out->ctx;
+
+       /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */
+       ctx = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, out, qpc);
+
        switch (index) {
        case QP_PID:
                param = qp->pid;
@@ -358,32 +362,32 @@ out:
 static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
                         int index)
 {
-       struct mlx5_query_eq_mbox_out *out;
-       struct mlx5_eq_context *ctx;
+       int outlen = MLX5_ST_SZ_BYTES(query_eq_out);
        u64 param = 0;
+       void *ctx;
+       u32 *out;
        int err;
 
-       out = kzalloc(sizeof(*out), GFP_KERNEL);
+       out = kzalloc(outlen, GFP_KERNEL);
        if (!out)
                return param;
 
-       ctx = &out->ctx;
-
-       err = mlx5_core_eq_query(dev, eq, out, sizeof(*out));
+       err = mlx5_core_eq_query(dev, eq, out, outlen);
        if (err) {
                mlx5_core_warn(dev, "failed to query eq\n");
                goto out;
        }
+       ctx = MLX5_ADDR_OF(query_eq_out, out, eq_context_entry);
 
        switch (index) {
        case EQ_NUM_EQES:
-               param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f);
+               param = 1 << MLX5_GET(eqc, ctx, log_eq_size);
                break;
        case EQ_INTR:
-               param = ctx->intr;
+               param = MLX5_GET(eqc, ctx, intr);
                break;
        case EQ_LOG_PG_SZ:
-               param = (ctx->log_page_size & 0x1f) + 12;
+               param = MLX5_GET(eqc, ctx, log_page_size) + 12;
                break;
        }
 
@@ -395,37 +399,37 @@ out:
 static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
                         int index)
 {
-       struct mlx5_query_cq_mbox_out *out;
-       struct mlx5_cq_context *ctx;
+       int outlen = MLX5_ST_SZ_BYTES(query_cq_out);
        u64 param = 0;
+       void *ctx;
+       u32 *out;
        int err;
 
-       out = kzalloc(sizeof(*out), GFP_KERNEL);
+       out = mlx5_vzalloc(outlen);
        if (!out)
                return param;
 
-       ctx = &out->ctx;
-
-       err = mlx5_core_query_cq(dev, cq, out);
+       err = mlx5_core_query_cq(dev, cq, out, outlen);
        if (err) {
                mlx5_core_warn(dev, "failed to query cq\n");
                goto out;
        }
+       ctx = MLX5_ADDR_OF(query_cq_out, out, cq_context);
 
        switch (index) {
        case CQ_PID:
                param = cq->pid;
                break;
        case CQ_NUM_CQES:
-               param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f);
+               param = 1 << MLX5_GET(cqc, ctx, log_cq_size);
                break;
        case CQ_LOG_PG_SZ:
-               param = (ctx->log_pg_sz & 0x1f) + 12;
+               param = MLX5_GET(cqc, ctx, log_page_size);
                break;
        }
 
 out:
-       kfree(out);
+       kvfree(out);
        return param;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
new file mode 100644 (file)
index 0000000..a9dbc28
--- /dev/null
@@ -0,0 +1,345 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+static LIST_HEAD(intf_list);
+static LIST_HEAD(mlx5_dev_list);
+/* intf dev list mutex */
+static DEFINE_MUTEX(mlx5_intf_mutex);
+
+struct mlx5_device_context {
+       struct list_head        list;
+       struct mlx5_interface  *intf;
+       void                   *context;
+       unsigned long           state;
+};
+
+enum {
+       MLX5_INTERFACE_ADDED,
+       MLX5_INTERFACE_ATTACHED,
+};
+
+void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+       struct mlx5_device_context *dev_ctx;
+       struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+       if (!mlx5_lag_intf_add(intf, priv))
+               return;
+
+       dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL);
+       if (!dev_ctx)
+               return;
+
+       dev_ctx->intf = intf;
+       dev_ctx->context = intf->add(dev);
+       set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
+       if (intf->attach)
+               set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
+
+       if (dev_ctx->context) {
+               spin_lock_irq(&priv->ctx_lock);
+               list_add_tail(&dev_ctx->list, &priv->ctx_list);
+               spin_unlock_irq(&priv->ctx_lock);
+       } else {
+               kfree(dev_ctx);
+       }
+}
+
+static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf,
+                                                  struct mlx5_priv *priv)
+{
+       struct mlx5_device_context *dev_ctx;
+
+       list_for_each_entry(dev_ctx, &priv->ctx_list, list)
+               if (dev_ctx->intf == intf)
+                       return dev_ctx;
+       return NULL;
+}
+
+void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+       struct mlx5_device_context *dev_ctx;
+       struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+       dev_ctx = mlx5_get_device(intf, priv);
+       if (!dev_ctx)
+               return;
+
+       spin_lock_irq(&priv->ctx_lock);
+       list_del(&dev_ctx->list);
+       spin_unlock_irq(&priv->ctx_lock);
+
+       if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
+               intf->remove(dev, dev_ctx->context);
+
+       kfree(dev_ctx);
+}
+
+static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+       struct mlx5_device_context *dev_ctx;
+       struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+       dev_ctx = mlx5_get_device(intf, priv);
+       if (!dev_ctx)
+               return;
+
+       if (intf->attach) {
+               if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
+                       return;
+               intf->attach(dev, dev_ctx->context);
+               set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
+       } else {
+               if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
+                       return;
+               dev_ctx->context = intf->add(dev);
+               set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
+       }
+}
+
+void mlx5_attach_device(struct mlx5_core_dev *dev)
+{
+       struct mlx5_priv *priv = &dev->priv;
+       struct mlx5_interface *intf;
+
+       mutex_lock(&mlx5_intf_mutex);
+       list_for_each_entry(intf, &intf_list, list)
+               mlx5_attach_interface(intf, priv);
+       mutex_unlock(&mlx5_intf_mutex);
+}
+
+static void mlx5_detach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+       struct mlx5_device_context *dev_ctx;
+       struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+       dev_ctx = mlx5_get_device(intf, priv);
+       if (!dev_ctx)
+               return;
+
+       if (intf->detach) {
+               if (!test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
+                       return;
+               intf->detach(dev, dev_ctx->context);
+               clear_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
+       } else {
+               if (!test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
+                       return;
+               intf->remove(dev, dev_ctx->context);
+               clear_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
+       }
+}
+
+void mlx5_detach_device(struct mlx5_core_dev *dev)
+{
+       struct mlx5_priv *priv = &dev->priv;
+       struct mlx5_interface *intf;
+
+       mutex_lock(&mlx5_intf_mutex);
+       list_for_each_entry(intf, &intf_list, list)
+               mlx5_detach_interface(intf, priv);
+       mutex_unlock(&mlx5_intf_mutex);
+}
+
+bool mlx5_device_registered(struct mlx5_core_dev *dev)
+{
+       struct mlx5_priv *priv;
+       bool found = false;
+
+       mutex_lock(&mlx5_intf_mutex);
+       list_for_each_entry(priv, &mlx5_dev_list, dev_list)
+               if (priv == &dev->priv)
+                       found = true;
+       mutex_unlock(&mlx5_intf_mutex);
+
+       return found;
+}
+
+int mlx5_register_device(struct mlx5_core_dev *dev)
+{
+       struct mlx5_priv *priv = &dev->priv;
+       struct mlx5_interface *intf;
+
+       mutex_lock(&mlx5_intf_mutex);
+       list_add_tail(&priv->dev_list, &mlx5_dev_list);
+       list_for_each_entry(intf, &intf_list, list)
+               mlx5_add_device(intf, priv);
+       mutex_unlock(&mlx5_intf_mutex);
+
+       return 0;
+}
+
+void mlx5_unregister_device(struct mlx5_core_dev *dev)
+{
+       struct mlx5_priv *priv = &dev->priv;
+       struct mlx5_interface *intf;
+
+       mutex_lock(&mlx5_intf_mutex);
+       list_for_each_entry(intf, &intf_list, list)
+               mlx5_remove_device(intf, priv);
+       list_del(&priv->dev_list);
+       mutex_unlock(&mlx5_intf_mutex);
+}
+
+int mlx5_register_interface(struct mlx5_interface *intf)
+{
+       struct mlx5_priv *priv;
+
+       if (!intf->add || !intf->remove)
+               return -EINVAL;
+
+       mutex_lock(&mlx5_intf_mutex);
+       list_add_tail(&intf->list, &intf_list);
+       list_for_each_entry(priv, &mlx5_dev_list, dev_list)
+               mlx5_add_device(intf, priv);
+       mutex_unlock(&mlx5_intf_mutex);
+
+       return 0;
+}
+EXPORT_SYMBOL(mlx5_register_interface);
+
+void mlx5_unregister_interface(struct mlx5_interface *intf)
+{
+       struct mlx5_priv *priv;
+
+       mutex_lock(&mlx5_intf_mutex);
+       list_for_each_entry(priv, &mlx5_dev_list, dev_list)
+               mlx5_remove_device(intf, priv);
+       list_del(&intf->list);
+       mutex_unlock(&mlx5_intf_mutex);
+}
+EXPORT_SYMBOL(mlx5_unregister_interface);
+
+void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
+{
+       struct mlx5_priv *priv = &mdev->priv;
+       struct mlx5_device_context *dev_ctx;
+       unsigned long flags;
+       void *result = NULL;
+
+       spin_lock_irqsave(&priv->ctx_lock, flags);
+
+       list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
+               if ((dev_ctx->intf->protocol == protocol) &&
+                   dev_ctx->intf->get_dev) {
+                       result = dev_ctx->intf->get_dev(dev_ctx->context);
+                       break;
+               }
+
+       spin_unlock_irqrestore(&priv->ctx_lock, flags);
+
+       return result;
+}
+EXPORT_SYMBOL(mlx5_get_protocol_dev);
+
+/* Must be called with intf_mutex held */
+void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
+{
+       struct mlx5_interface *intf;
+
+       list_for_each_entry(intf, &intf_list, list)
+               if (intf->protocol == protocol) {
+                       mlx5_add_device(intf, &dev->priv);
+                       break;
+               }
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
+{
+       struct mlx5_interface *intf;
+
+       list_for_each_entry(intf, &intf_list, list)
+               if (intf->protocol == protocol) {
+                       mlx5_remove_device(intf, &dev->priv);
+                       break;
+               }
+}
+
+static u16 mlx5_gen_pci_id(struct mlx5_core_dev *dev)
+{
+       return (u16)((dev->pdev->bus->number << 8) |
+                    PCI_SLOT(dev->pdev->devfn));
+}
+
+/* Must be called with intf_mutex held */
+struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
+{
+       u16 pci_id = mlx5_gen_pci_id(dev);
+       struct mlx5_core_dev *res = NULL;
+       struct mlx5_core_dev *tmp_dev;
+       struct mlx5_priv *priv;
+
+       list_for_each_entry(priv, &mlx5_dev_list, dev_list) {
+               tmp_dev = container_of(priv, struct mlx5_core_dev, priv);
+               if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) {
+                       res = tmp_dev;
+                       break;
+               }
+       }
+
+       return res;
+}
+
+void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
+                    unsigned long param)
+{
+       struct mlx5_priv *priv = &dev->priv;
+       struct mlx5_device_context *dev_ctx;
+       unsigned long flags;
+
+       spin_lock_irqsave(&priv->ctx_lock, flags);
+
+       list_for_each_entry(dev_ctx, &priv->ctx_list, list)
+               if (dev_ctx->intf->event)
+                       dev_ctx->intf->event(dev, dev_ctx->context, event, param);
+
+       spin_unlock_irqrestore(&priv->ctx_lock, flags);
+}
+
+void mlx5_dev_list_lock(void)
+{
+       mutex_lock(&mlx5_intf_mutex);
+}
+
+void mlx5_dev_list_unlock(void)
+{
+       mutex_unlock(&mlx5_intf_mutex);
+}
+
+int mlx5_dev_list_trylock(void)
+{
+       return mutex_trylock(&mlx5_intf_mutex);
+}
index 1b495ef..7dd4763 100644 (file)
 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE                0xd
 
 #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW            0x1
-#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW            0x4
+#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW            0x3
 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW            0x6
 
 #define MLX5_MPWRQ_LOG_STRIDE_SIZE             6  /* >= 6, HW restriction */
 #define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS        8  /* >= 6, HW restriction */
-#define MLX5_MPWRQ_LOG_WQE_SZ                  17
+#define MLX5_MPWRQ_LOG_WQE_SZ                  18
 #define MLX5_MPWRQ_WQE_PAGE_ORDER  (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
                                    MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
 #define MLX5_MPWRQ_PAGES_PER_WQE               BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
 #define MLX5_MPWRQ_STRIDES_PER_PAGE            (MLX5_MPWRQ_NUM_STRIDES >> \
                                                 MLX5_MPWRQ_WQE_PAGE_ORDER)
-#define MLX5_CHANNEL_MAX_NUM_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8) * \
-                                  BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW))
+
+#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
+#define MLX5E_REQUIRED_MTTS(rqs, wqes)\
+       (rqs * wqes * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
+#define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) <= U16_MAX)
+
 #define MLX5_UMR_ALIGN                         (2048)
 #define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD      (128)
 
@@ -219,9 +223,8 @@ struct mlx5e_tstamp {
 };
 
 enum {
-       MLX5E_RQ_STATE_POST_WQES_ENABLE,
+       MLX5E_RQ_STATE_FLUSH,
        MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS,
-       MLX5E_RQ_STATE_FLUSH_TIMEOUT,
        MLX5E_RQ_STATE_AM,
 };
 
@@ -284,26 +287,41 @@ struct mlx5e_rx_am { /* Adaptive Moderation */
        u8                                      tired;
 };
 
+/* a single cache unit is capable to serve one napi call (for non-striding rq)
+ * or a MPWQE (for striding rq).
+ */
+#define MLX5E_CACHE_UNIT       (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
+                                MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
+#define MLX5E_CACHE_SIZE       (2 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
+struct mlx5e_page_cache {
+       u32 head;
+       u32 tail;
+       struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE];
+};
+
 struct mlx5e_rq {
        /* data path */
        struct mlx5_wq_ll      wq;
        u32                    wqe_sz;
        struct sk_buff       **skb;
        struct mlx5e_mpw_info *wqe_info;
+       void                  *mtt_no_align;
        __be32                 mkey_be;
-       __be32                 umr_mkey_be;
 
        struct device         *pdev;
        struct net_device     *netdev;
        struct mlx5e_tstamp   *tstamp;
        struct mlx5e_rq_stats  stats;
        struct mlx5e_cq        cq;
+       struct mlx5e_page_cache page_cache;
+
        mlx5e_fp_handle_rx_cqe handle_rx_cqe;
        mlx5e_fp_alloc_wqe     alloc_wqe;
        mlx5e_fp_dealloc_wqe   dealloc_wqe;
 
        unsigned long          state;
        int                    ix;
+       u32                    mpwqe_mtt_offset;
 
        struct mlx5e_rx_am     am; /* Adaptive Moderation */
 
@@ -319,32 +337,15 @@ struct mlx5e_rq {
 
 struct mlx5e_umr_dma_info {
        __be64                *mtt;
-       __be64                *mtt_no_align;
        dma_addr_t             mtt_addr;
-       struct mlx5e_dma_info *dma_info;
+       struct mlx5e_dma_info  dma_info[MLX5_MPWRQ_PAGES_PER_WQE];
+       struct mlx5e_umr_wqe   wqe;
 };
 
 struct mlx5e_mpw_info {
-       union {
-               struct mlx5e_dma_info     dma_info;
-               struct mlx5e_umr_dma_info umr;
-       };
+       struct mlx5e_umr_dma_info umr;
        u16 consumed_strides;
        u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE];
-
-       void (*dma_pre_sync)(struct device *pdev,
-                            struct mlx5e_mpw_info *wi,
-                            u32 wqe_offset, u32 len);
-       void (*add_skb_frag)(struct mlx5e_rq *rq,
-                            struct sk_buff *skb,
-                            struct mlx5e_mpw_info *wi,
-                            u32 page_idx, u32 frag_offset, u32 len);
-       void (*copy_skb_header)(struct device *pdev,
-                               struct sk_buff *skb,
-                               struct mlx5e_mpw_info *wi,
-                               u32 page_idx, u32 offset,
-                               u32 headlen);
-       void (*free_wqe)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi);
 };
 
 struct mlx5e_tx_wqe_info {
@@ -365,9 +366,8 @@ struct mlx5e_sq_dma {
 };
 
 enum {
-       MLX5E_SQ_STATE_WAKE_TXQ_ENABLE,
+       MLX5E_SQ_STATE_FLUSH,
        MLX5E_SQ_STATE_BF_ENABLE,
-       MLX5E_SQ_STATE_TX_TIMEOUT,
 };
 
 struct mlx5e_ico_wqe_info {
@@ -651,40 +651,6 @@ struct mlx5e_priv {
        void                      *ppriv;
 };
 
-enum mlx5e_link_mode {
-       MLX5E_1000BASE_CX_SGMII  = 0,
-       MLX5E_1000BASE_KX        = 1,
-       MLX5E_10GBASE_CX4        = 2,
-       MLX5E_10GBASE_KX4        = 3,
-       MLX5E_10GBASE_KR         = 4,
-       MLX5E_20GBASE_KR2        = 5,
-       MLX5E_40GBASE_CR4        = 6,
-       MLX5E_40GBASE_KR4        = 7,
-       MLX5E_56GBASE_R4         = 8,
-       MLX5E_10GBASE_CR         = 12,
-       MLX5E_10GBASE_SR         = 13,
-       MLX5E_10GBASE_ER         = 14,
-       MLX5E_40GBASE_SR4        = 15,
-       MLX5E_40GBASE_LR4        = 16,
-       MLX5E_50GBASE_SR2        = 18,
-       MLX5E_100GBASE_CR4       = 20,
-       MLX5E_100GBASE_SR4       = 21,
-       MLX5E_100GBASE_KR4       = 22,
-       MLX5E_100GBASE_LR4       = 23,
-       MLX5E_100BASE_TX         = 24,
-       MLX5E_1000BASE_T         = 25,
-       MLX5E_10GBASE_T          = 26,
-       MLX5E_25GBASE_CR         = 27,
-       MLX5E_25GBASE_KR         = 28,
-       MLX5E_25GBASE_SR         = 29,
-       MLX5E_50GBASE_CR2        = 30,
-       MLX5E_50GBASE_KR2        = 31,
-       MLX5E_LINK_MODES_NUMBER,
-};
-
-#define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
-
-
 void mlx5e_build_ptys2ethtool_map(void);
 
 void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw);
@@ -698,30 +664,18 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget);
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
 void mlx5e_free_tx_descs(struct mlx5e_sq *sq);
-void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
 
+void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
+                       bool recycle);
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
 int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
-int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
+int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe,        u16 ix);
 void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
-void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq);
-void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq,
-                                   struct mlx5_cqe64 *cqe,
-                                   u16 byte_cnt,
-                                   struct mlx5e_mpw_info *wi,
-                                   struct sk_buff *skb);
-void mlx5e_complete_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
-                                       struct mlx5_cqe64 *cqe,
-                                       u16 byte_cnt,
-                                       struct mlx5e_mpw_info *wi,
-                                       struct sk_buff *skb);
-void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq,
-                               struct mlx5e_mpw_info *wi);
-void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
-                                   struct mlx5e_mpw_info *wi);
+void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq);
+void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi);
 struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);
 
 void mlx5e_rx_am(struct mlx5e_rq *rq);
@@ -808,15 +762,16 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
        mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, NULL, cq->wq.cc);
 }
 
-static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
+static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix)
 {
-       return min_t(int, mdev->priv.eq_table.num_comp_vectors,
-                    MLX5E_MAX_NUM_CHANNELS);
+       return rq->mpwqe_mtt_offset +
+               wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8);
 }
 
-static inline int mlx5e_get_mtt_octw(int npages)
+static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
 {
-       return ALIGN(npages, 8) / 2;
+       return min_t(int, mdev->priv.eq_table.num_comp_vectors,
+                    MLX5E_MAX_NUM_CHANNELS);
 }
 
 extern const struct ethtool_ops mlx5e_ethtool_ops;
@@ -881,9 +836,12 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv);
 int mlx5e_close(struct net_device *netdev);
 int mlx5e_open(struct net_device *netdev);
 void mlx5e_update_stats_work(struct work_struct *work);
-void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
-                         const struct mlx5e_profile *profile, void *ppriv);
+struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
+                                      const struct mlx5e_profile *profile,
+                                      void *ppriv);
 void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv);
+int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
+void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
 struct rtnl_link_stats64 *
 mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
 
index 673043c..029e856 100644 (file)
@@ -60,24 +60,27 @@ void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
 static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
                             struct mlx5_core_mkey *mkey)
 {
-       struct mlx5_create_mkey_mbox_in *in;
+       int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+       void *mkc;
+       u32 *in;
        int err;
 
-       in = mlx5_vzalloc(sizeof(*in));
+       in = mlx5_vzalloc(inlen);
        if (!in)
                return -ENOMEM;
 
-       in->seg.flags = MLX5_PERM_LOCAL_WRITE |
-                       MLX5_PERM_LOCAL_READ  |
-                       MLX5_ACCESS_MODE_PA;
-       in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
-       in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+       mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+       MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA);
+       MLX5_SET(mkc, mkc, lw, 1);
+       MLX5_SET(mkc, mkc, lr, 1);
 
-       err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL,
-                                   NULL);
+       MLX5_SET(mkc, mkc, pd, pdn);
+       MLX5_SET(mkc, mkc, length64, 1);
+       MLX5_SET(mkc, mkc, qpn, 0xffffff);
 
-       kvfree(in);
+       err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
 
+       kvfree(in);
        return err;
 }
 
@@ -139,7 +142,7 @@ int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev)
        struct mlx5e_tir *tir;
        void *in;
        int inlen;
-       int err;
+       int err = 0;
 
        inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
        in = mlx5_vzalloc(inlen);
@@ -151,10 +154,11 @@ int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev)
        list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
                err = mlx5_core_modify_tir(mdev, tir->tirn, in, inlen);
                if (err)
-                       return err;
+                       goto out;
        }
 
+out:
        kvfree(in);
 
-       return 0;
+       return err;
 }
index caa9a3c..762af16 100644 (file)
@@ -127,29 +127,40 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
        return mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw);
 }
 
-static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets)
+static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
+                                   struct ieee_ets *ets)
 {
        int bw_sum = 0;
        int i;
 
        /* Validate Priority */
        for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-               if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY)
+               if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY) {
+                       netdev_err(netdev,
+                                  "Failed to validate ETS: priority value greater than max(%d)\n",
+                                   MLX5E_MAX_PRIORITY);
                        return -EINVAL;
+               }
        }
 
        /* Validate Bandwidth Sum */
        for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
                if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
-                       if (!ets->tc_tx_bw[i])
+                       if (!ets->tc_tx_bw[i]) {
+                               netdev_err(netdev,
+                                          "Failed to validate ETS: BW 0 is illegal\n");
                                return -EINVAL;
+                       }
 
                        bw_sum += ets->tc_tx_bw[i];
                }
        }
 
-       if (bw_sum != 0 && bw_sum != 100)
+       if (bw_sum != 0 && bw_sum != 100) {
+               netdev_err(netdev,
+                          "Failed to validate ETS: BW sum is illegal\n");
                return -EINVAL;
+       }
        return 0;
 }
 
@@ -159,7 +170,7 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev,
        struct mlx5e_priv *priv = netdev_priv(netdev);
        int err;
 
-       err = mlx5e_dbcnl_validate_ets(ets);
+       err = mlx5e_dbcnl_validate_ets(netdev, ets);
        if (err)
                return err;
 
index 4a3757e..27ff401 100644 (file)
@@ -331,7 +331,7 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
        if (mlx5e_query_global_pause_combined(priv)) {
                for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
                        data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0],
-                                                         pport_per_prio_pfc_stats_desc, 0);
+                                                         pport_per_prio_pfc_stats_desc, i);
                }
        }
 
@@ -352,15 +352,61 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
                                                                   sq_stats_desc, j);
 }
 
+static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type,
+                                   int num_wqe)
+{
+       int packets_per_wqe;
+       int stride_size;
+       int num_strides;
+       int wqe_size;
+
+       if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+               return num_wqe;
+
+       stride_size = 1 << priv->params.mpwqe_log_stride_sz;
+       num_strides = 1 << priv->params.mpwqe_log_num_strides;
+       wqe_size = stride_size * num_strides;
+
+       packets_per_wqe = wqe_size /
+                         ALIGN(ETH_DATA_LEN, stride_size);
+       return (1 << (order_base_2(num_wqe * packets_per_wqe) - 1));
+}
+
+static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type,
+                                   int num_packets)
+{
+       int packets_per_wqe;
+       int stride_size;
+       int num_strides;
+       int wqe_size;
+       int num_wqes;
+
+       if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+               return num_packets;
+
+       stride_size = 1 << priv->params.mpwqe_log_stride_sz;
+       num_strides = 1 << priv->params.mpwqe_log_num_strides;
+       wqe_size = stride_size * num_strides;
+
+       num_packets = (1 << order_base_2(num_packets));
+
+       packets_per_wqe = wqe_size /
+                         ALIGN(ETH_DATA_LEN, stride_size);
+       num_wqes = DIV_ROUND_UP(num_packets, packets_per_wqe);
+       return 1 << (order_base_2(num_wqes));
+}
+
 static void mlx5e_get_ringparam(struct net_device *dev,
                                struct ethtool_ringparam *param)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
        int rq_wq_type = priv->params.rq_wq_type;
 
-       param->rx_max_pending = 1 << mlx5_max_log_rq_size(rq_wq_type);
+       param->rx_max_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
+                                                        1 << mlx5_max_log_rq_size(rq_wq_type));
        param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE;
-       param->rx_pending     = 1 << priv->params.log_rq_size;
+       param->rx_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
+                                                    1 << priv->params.log_rq_size);
        param->tx_pending     = 1 << priv->params.log_sq_size;
 }
 
@@ -370,9 +416,13 @@ static int mlx5e_set_ringparam(struct net_device *dev,
        struct mlx5e_priv *priv = netdev_priv(dev);
        bool was_opened;
        int rq_wq_type = priv->params.rq_wq_type;
+       u32 rx_pending_wqes;
+       u32 min_rq_size;
+       u32 max_rq_size;
        u16 min_rx_wqes;
        u8 log_rq_size;
        u8 log_sq_size;
+       u32 num_mtts;
        int err = 0;
 
        if (param->rx_jumbo_pending) {
@@ -385,18 +435,36 @@ static int mlx5e_set_ringparam(struct net_device *dev,
                            __func__);
                return -EINVAL;
        }
-       if (param->rx_pending < (1 << mlx5_min_log_rq_size(rq_wq_type))) {
+
+       min_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
+                                              1 << mlx5_min_log_rq_size(rq_wq_type));
+       max_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
+                                              1 << mlx5_max_log_rq_size(rq_wq_type));
+       rx_pending_wqes = mlx5e_packets_to_rx_wqes(priv, rq_wq_type,
+                                                  param->rx_pending);
+
+       if (param->rx_pending < min_rq_size) {
                netdev_info(dev, "%s: rx_pending (%d) < min (%d)\n",
                            __func__, param->rx_pending,
-                           1 << mlx5_min_log_rq_size(rq_wq_type));
+                           min_rq_size);
                return -EINVAL;
        }
-       if (param->rx_pending > (1 << mlx5_max_log_rq_size(rq_wq_type))) {
+       if (param->rx_pending > max_rq_size) {
                netdev_info(dev, "%s: rx_pending (%d) > max (%d)\n",
                            __func__, param->rx_pending,
-                           1 << mlx5_max_log_rq_size(rq_wq_type));
+                           max_rq_size);
+               return -EINVAL;
+       }
+
+       num_mtts = MLX5E_REQUIRED_MTTS(priv->params.num_channels,
+                                      rx_pending_wqes);
+       if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+           !MLX5E_VALID_NUM_MTTS(num_mtts)) {
+               netdev_info(dev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n",
+                           __func__, param->rx_pending);
                return -EINVAL;
        }
+
        if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) {
                netdev_info(dev, "%s: tx_pending (%d) < min (%d)\n",
                            __func__, param->tx_pending,
@@ -410,9 +478,9 @@ static int mlx5e_set_ringparam(struct net_device *dev,
                return -EINVAL;
        }
 
-       log_rq_size = order_base_2(param->rx_pending);
+       log_rq_size = order_base_2(rx_pending_wqes);
        log_sq_size = order_base_2(param->tx_pending);
-       min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, param->rx_pending);
+       min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, rx_pending_wqes);
 
        if (log_rq_size == priv->params.log_rq_size &&
            log_sq_size == priv->params.log_sq_size &&
@@ -454,6 +522,7 @@ static int mlx5e_set_channels(struct net_device *dev,
        unsigned int count = ch->combined_count;
        bool arfs_enabled;
        bool was_opened;
+       u32 num_mtts;
        int err = 0;
 
        if (!count) {
@@ -472,6 +541,14 @@ static int mlx5e_set_channels(struct net_device *dev,
                return -EINVAL;
        }
 
+       num_mtts = MLX5E_REQUIRED_MTTS(count, BIT(priv->params.log_rq_size));
+       if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+           !MLX5E_VALID_NUM_MTTS(num_mtts)) {
+               netdev_info(dev, "%s: rx count (%d) request can't be satisfied, try to reduce.\n",
+                           __func__, count);
+               return -EINVAL;
+       }
+
        if (priv->params.num_channels == count)
                return 0;
 
@@ -582,9 +659,10 @@ out:
 static void ptys2ethtool_supported_link(unsigned long *supported_modes,
                                        u32 eth_proto_cap)
 {
+       unsigned long proto_cap = eth_proto_cap;
        int proto;
 
-       for_each_set_bit(proto, (unsigned long *)&eth_proto_cap, MLX5E_LINK_MODES_NUMBER)
+       for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER)
                bitmap_or(supported_modes, supported_modes,
                          ptys2ethtool_table[proto].supported,
                          __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -593,9 +671,10 @@ static void ptys2ethtool_supported_link(unsigned long *supported_modes,
 static void ptys2ethtool_adver_link(unsigned long *advertising_modes,
                                    u32 eth_proto_cap)
 {
+       unsigned long proto_cap = eth_proto_cap;
        int proto;
 
-       for_each_set_bit(proto, (unsigned long *)&eth_proto_cap, MLX5E_LINK_MODES_NUMBER)
+       for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER)
                bitmap_or(advertising_modes, advertising_modes,
                          ptys2ethtool_table[proto].advertised,
                          __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -726,7 +805,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
 {
        struct mlx5e_priv *priv    = netdev_priv(netdev);
        struct mlx5_core_dev *mdev = priv->mdev;
-       u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+       u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
        u32 eth_proto_cap;
        u32 eth_proto_admin;
        u32 eth_proto_lp;
@@ -736,7 +815,6 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
        int err;
 
        err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
-
        if (err) {
                netdev_err(netdev, "%s: query port ptys failed: %d\n",
                           __func__, err);
index 1587a9f..36fbc6b 100644 (file)
@@ -294,6 +294,36 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
        return 0;
 }
 
+static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv)
+{
+       int i;
+
+       mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+
+       for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) {
+               mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i);
+       }
+
+       if (priv->fs.vlan.filter_disabled &&
+           !(priv->netdev->flags & IFF_PROMISC))
+               mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0);
+}
+
+static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
+{
+       int i;
+
+       mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+
+       for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) {
+               mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i);
+       }
+
+       if (priv->fs.vlan.filter_disabled &&
+           !(priv->netdev->flags & IFF_PROMISC))
+               mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0);
+}
+
 #define mlx5e_for_each_hash_node(hn, tmp, hash, i) \
        for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \
                hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist)
@@ -1024,14 +1054,10 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv)
        if (err)
                goto err_free_g;
 
-       err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
-       if (err)
-               goto err_destroy_vlan_flow_groups;
+       mlx5e_add_vlan_rules(priv);
 
        return 0;
 
-err_destroy_vlan_flow_groups:
-       mlx5e_destroy_groups(ft);
 err_free_g:
        kfree(ft->g);
 err_destroy_vlan_table:
@@ -1043,6 +1069,7 @@ err_destroy_vlan_table:
 
 static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv)
 {
+       mlx5e_del_vlan_rules(priv);
        mlx5e_destroy_flow_table(&priv->fs.vlan.ft);
 }
 
@@ -1100,7 +1127,6 @@ err_destroy_arfs_tables:
 
 void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
 {
-       mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
        mlx5e_destroy_vlan_table(priv);
        mlx5e_destroy_l2_table(priv);
        mlx5e_destroy_ttc_table(priv);
index 870bea3..8595b50 100644 (file)
 #include "eswitch.h"
 #include "vxlan.h"
 
-enum {
-       MLX5_EN_QP_FLUSH_TIMEOUT_MS     = 5000,
-       MLX5_EN_QP_FLUSH_MSLEEP_QUANT   = 20,
-       MLX5_EN_QP_FLUSH_MAX_ITER       = MLX5_EN_QP_FLUSH_TIMEOUT_MS /
-                                         MLX5_EN_QP_FLUSH_MSLEEP_QUANT,
-};
-
 struct mlx5e_rq_param {
        u32                     rqc[MLX5_ST_SZ_DW(rqc)];
        struct mlx5_wq_param    wq;
@@ -145,10 +138,13 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
                s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
                s->rx_wqe_err   += rq_stats->wqe_err;
                s->rx_mpwqe_filler += rq_stats->mpwqe_filler;
-               s->rx_mpwqe_frag   += rq_stats->mpwqe_frag;
                s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
                s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
                s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
+               s->rx_cache_reuse += rq_stats->cache_reuse;
+               s->rx_cache_full  += rq_stats->cache_full;
+               s->rx_cache_empty += rq_stats->cache_empty;
+               s->rx_cache_busy  += rq_stats->cache_busy;
 
                for (j = 0; j < priv->params.num_tc; j++) {
                        sq_stats = &priv->channel[i]->sq[j].stats;
@@ -162,6 +158,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
                        s->tx_queue_stopped     += sq_stats->stopped;
                        s->tx_queue_wake        += sq_stats->wake;
                        s->tx_queue_dropped     += sq_stats->dropped;
+                       s->tx_xmit_more         += sq_stats->xmit_more;
                        s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
                        tx_offload_none         += sq_stats->csum_none;
                }
@@ -180,18 +177,15 @@ static void mlx5e_update_vport_counters(struct mlx5e_priv *priv)
 {
        int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
        u32 *out = (u32 *)priv->stats.vport.query_vport_out;
-       u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
+       u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0};
        struct mlx5_core_dev *mdev = priv->mdev;
 
-       memset(in, 0, sizeof(in));
-
        MLX5_SET(query_vport_counter_in, in, opcode,
                 MLX5_CMD_OP_QUERY_VPORT_COUNTER);
        MLX5_SET(query_vport_counter_in, in, op_mod, 0);
        MLX5_SET(query_vport_counter_in, in, other_vport, 0);
 
        memset(out, 0, outlen);
-
        mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
 }
 
@@ -304,6 +298,107 @@ static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
 #define MLX5E_HW2SW_MTU(hwmtu) (hwmtu - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN))
 #define MLX5E_SW2HW_MTU(swmtu) (swmtu + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN))
 
+static inline int mlx5e_get_wqe_mtt_sz(void)
+{
+       /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
+        * To avoid copying garbage after the mtt array, we allocate
+        * a little more.
+        */
+       return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64),
+                    MLX5_UMR_MTT_ALIGNMENT);
+}
+
+static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq,
+                                      struct mlx5e_umr_wqe *wqe, u16 ix)
+{
+       struct mlx5_wqe_ctrl_seg      *cseg = &wqe->ctrl;
+       struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
+       struct mlx5_wqe_data_seg      *dseg = &wqe->data;
+       struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
+       u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS);
+       u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix);
+
+       cseg->qpn_ds    = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+                                     ds_cnt);
+       cseg->fm_ce_se  = MLX5_WQE_CTRL_CQ_UPDATE;
+       cseg->imm       = rq->mkey_be;
+
+       ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN;
+       ucseg->klm_octowords =
+               cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE));
+       ucseg->bsf_octowords =
+               cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset));
+       ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
+
+       dseg->lkey = sq->mkey_be;
+       dseg->addr = cpu_to_be64(wi->umr.mtt_addr);
+}
+
+static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
+                                    struct mlx5e_channel *c)
+{
+       int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
+       int mtt_sz = mlx5e_get_wqe_mtt_sz();
+       int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
+       int i;
+
+       rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info),
+                                   GFP_KERNEL, cpu_to_node(c->cpu));
+       if (!rq->wqe_info)
+               goto err_out;
+
+       /* We allocate more than mtt_sz as we will align the pointer */
+       rq->mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
+                                       cpu_to_node(c->cpu));
+       if (unlikely(!rq->mtt_no_align))
+               goto err_free_wqe_info;
+
+       for (i = 0; i < wq_sz; i++) {
+               struct mlx5e_mpw_info *wi = &rq->wqe_info[i];
+
+               wi->umr.mtt = PTR_ALIGN(rq->mtt_no_align + i * mtt_alloc,
+                                       MLX5_UMR_ALIGN);
+               wi->umr.mtt_addr = dma_map_single(c->pdev, wi->umr.mtt, mtt_sz,
+                                                 PCI_DMA_TODEVICE);
+               if (unlikely(dma_mapping_error(c->pdev, wi->umr.mtt_addr)))
+                       goto err_unmap_mtts;
+
+               mlx5e_build_umr_wqe(rq, &c->icosq, &wi->umr.wqe, i);
+       }
+
+       return 0;
+
+err_unmap_mtts:
+       while (--i >= 0) {
+               struct mlx5e_mpw_info *wi = &rq->wqe_info[i];
+
+               dma_unmap_single(c->pdev, wi->umr.mtt_addr, mtt_sz,
+                                PCI_DMA_TODEVICE);
+       }
+       kfree(rq->mtt_no_align);
+err_free_wqe_info:
+       kfree(rq->wqe_info);
+
+err_out:
+       return -ENOMEM;
+}
+
+static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq)
+{
+       int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
+       int mtt_sz = mlx5e_get_wqe_mtt_sz();
+       int i;
+
+       for (i = 0; i < wq_sz; i++) {
+               struct mlx5e_mpw_info *wi = &rq->wqe_info[i];
+
+               dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz,
+                                PCI_DMA_TODEVICE);
+       }
+       kfree(rq->mtt_no_align);
+       kfree(rq->wqe_info);
+}
+
 static int mlx5e_create_rq(struct mlx5e_channel *c,
                           struct mlx5e_rq_param *param,
                           struct mlx5e_rq *rq)
@@ -328,22 +423,31 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
 
        wq_sz = mlx5_wq_ll_get_size(&rq->wq);
 
+       rq->wq_type = priv->params.rq_wq_type;
+       rq->pdev    = c->pdev;
+       rq->netdev  = c->netdev;
+       rq->tstamp  = &priv->tstamp;
+       rq->channel = c;
+       rq->ix      = c->ix;
+       rq->priv    = c->priv;
+
        switch (priv->params.rq_wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-               rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info),
-                                           GFP_KERNEL, cpu_to_node(c->cpu));
-               if (!rq->wqe_info) {
-                       err = -ENOMEM;
-                       goto err_rq_wq_destroy;
-               }
                rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
                rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
                rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
+               rq->mpwqe_mtt_offset = c->ix *
+                       MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size));
+
                rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
                rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
                rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
                byte_count = rq->wqe_sz;
+               rq->mkey_be = cpu_to_be32(c->priv->umr_mkey.key);
+               err = mlx5e_rq_alloc_mpwqe_info(rq, c);
+               if (err)
+                       goto err_rq_wq_destroy;
                break;
        default: /* MLX5_WQ_TYPE_LINKED_LIST */
                rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL,
@@ -362,26 +466,21 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
                rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz);
                byte_count = rq->wqe_sz;
                byte_count |= MLX5_HW_START_PADDING;
+               rq->mkey_be = c->mkey_be;
        }
 
        for (i = 0; i < wq_sz; i++) {
                struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
 
                wqe->data.byte_count = cpu_to_be32(byte_count);
+               wqe->data.lkey = rq->mkey_be;
        }
 
        INIT_WORK(&rq->am.work, mlx5e_rx_am_work);
        rq->am.mode = priv->params.rx_cq_period_mode;
 
-       rq->wq_type = priv->params.rq_wq_type;
-       rq->pdev    = c->pdev;
-       rq->netdev  = c->netdev;
-       rq->tstamp  = &priv->tstamp;
-       rq->channel = c;
-       rq->ix      = c->ix;
-       rq->priv    = c->priv;
-       rq->mkey_be = c->mkey_be;
-       rq->umr_mkey_be = cpu_to_be32(c->priv->umr_mkey.key);
+       rq->page_cache.head = 0;
+       rq->page_cache.tail = 0;
 
        return 0;
 
@@ -393,14 +492,22 @@ err_rq_wq_destroy:
 
 static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
 {
+       int i;
+
        switch (rq->wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-               kfree(rq->wqe_info);
+               mlx5e_rq_free_mpwqe_info(rq);
                break;
        default: /* MLX5_WQ_TYPE_LINKED_LIST */
                kfree(rq->skb);
        }
 
+       for (i = rq->page_cache.head; i != rq->page_cache.tail;
+            i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
+               struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i];
+
+               mlx5e_page_release(rq, dma_info, false);
+       }
        mlx5_wq_destroy(&rq->wq_ctrl);
 }
 
@@ -428,7 +535,6 @@ static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
 
        MLX5_SET(rqc,  rqc, cqn,                rq->cq.mcq.cqn);
        MLX5_SET(rqc,  rqc, state,              MLX5_RQC_STATE_RST);
-       MLX5_SET(rqc,  rqc, flush_in_error_en,  1);
        MLX5_SET(rqc,  rqc, vsd, priv->params.vlan_strip_disable);
        MLX5_SET(wq,   wq,  log_wq_pg_sz,       rq->wq_ctrl.buf.page_shift -
                                                MLX5_ADAPTER_PAGE_SHIFT);
@@ -492,7 +598,8 @@ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
        rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
 
        MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY);
-       MLX5_SET64(modify_rq_in, in, modify_bitmask, MLX5_RQ_BITMASK_VSD);
+       MLX5_SET64(modify_rq_in, in, modify_bitmask,
+                  MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
        MLX5_SET(rqc, rqc, vsd, vsd);
        MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
 
@@ -525,6 +632,27 @@ static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
        return -ETIMEDOUT;
 }
 
+static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
+{
+       struct mlx5_wq_ll *wq = &rq->wq;
+       struct mlx5e_rx_wqe *wqe;
+       __be16 wqe_ix_be;
+       u16 wqe_ix;
+
+       /* UMR WQE (if in progress) is always at wq->head */
+       if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
+               mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]);
+
+       while (!mlx5_wq_ll_is_empty(wq)) {
+               wqe_ix_be = *wq->tail_next;
+               wqe_ix    = be16_to_cpu(wqe_ix_be);
+               wqe       = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix);
+               rq->dealloc_wqe(rq, wqe_ix);
+               mlx5_wq_ll_pop(&rq->wq, wqe_ix_be,
+                              &wqe->next.next_wqe_index);
+       }
+}
+
 static int mlx5e_open_rq(struct mlx5e_channel *c,
                         struct mlx5e_rq_param *param,
                         struct mlx5e_rq *rq)
@@ -548,8 +676,6 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
        if (param->am_enabled)
                set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
 
-       set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state);
-
        sq->ico_wqe_info[pi].opcode     = MLX5_OPCODE_NOP;
        sq->ico_wqe_info[pi].num_wqebbs = 1;
        mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */
@@ -566,23 +692,8 @@ err_destroy_rq:
 
 static void mlx5e_close_rq(struct mlx5e_rq *rq)
 {
-       int tout = 0;
-       int err;
-
-       clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state);
+       set_bit(MLX5E_RQ_STATE_FLUSH, &rq->state);
        napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
-
-       err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
-       while (!mlx5_wq_ll_is_empty(&rq->wq) && !err &&
-              tout++ < MLX5_EN_QP_FLUSH_MAX_ITER)
-               msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
-
-       if (err || tout == MLX5_EN_QP_FLUSH_MAX_ITER)
-               set_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state);
-
-       /* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */
-       napi_synchronize(&rq->channel->napi);
-
        cancel_work_sync(&rq->am.work);
 
        mlx5e_disable_rq(rq);
@@ -821,7 +932,6 @@ static int mlx5e_open_sq(struct mlx5e_channel *c,
                goto err_disable_sq;
 
        if (sq->txq) {
-               set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state);
                netdev_tx_reset_queue(sq->txq);
                netif_tx_start_queue(sq->txq);
        }
@@ -845,38 +955,20 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq)
 
 static void mlx5e_close_sq(struct mlx5e_sq *sq)
 {
-       int tout = 0;
-       int err;
+       set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state);
+       /* prevent netif_tx_wake_queue */
+       napi_synchronize(&sq->channel->napi);
 
        if (sq->txq) {
-               clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state);
-               /* prevent netif_tx_wake_queue */
-               napi_synchronize(&sq->channel->napi);
                netif_tx_disable_queue(sq->txq);
 
-               /* ensure hw is notified of all pending wqes */
+               /* last doorbell out, godspeed .. */
                if (mlx5e_sq_has_room_for(sq, 1))
                        mlx5e_send_nop(sq, true);
-
-               err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY,
-                                     MLX5_SQC_STATE_ERR, false, 0);
-               if (err)
-                       set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
        }
 
-       /* wait till sq is empty, unless a TX timeout occurred on this SQ */
-       while (sq->cc != sq->pc &&
-              !test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)) {
-               msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
-               if (tout++ > MLX5_EN_QP_FLUSH_MAX_ITER)
-                       set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
-       }
-
-       /* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */
-       napi_synchronize(&sq->channel->napi);
-
-       mlx5e_free_tx_descs(sq);
        mlx5e_disable_sq(sq);
+       mlx5e_free_tx_descs(sq);
        mlx5e_destroy_sq(sq);
 }
 
@@ -1826,10 +1918,6 @@ int mlx5e_open_locked(struct net_device *netdev)
        netif_set_real_num_tx_queues(netdev, num_txqs);
        netif_set_real_num_rx_queues(netdev, priv->params.num_channels);
 
-       err = mlx5e_set_dev_port_mtu(netdev);
-       if (err)
-               goto err_clear_state_opened_flag;
-
        err = mlx5e_open_channels(priv);
        if (err) {
                netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n",
@@ -1908,6 +1996,9 @@ int mlx5e_close(struct net_device *netdev)
        struct mlx5e_priv *priv = netdev_priv(netdev);
        int err;
 
+       if (!netif_device_present(netdev))
+               return -ENODEV;
+
        mutex_lock(&priv->state_lock);
        err = mlx5e_close_locked(netdev);
        mutex_unlock(&priv->state_lock);
@@ -2022,14 +2113,15 @@ static void mlx5e_close_drop_rq(struct mlx5e_priv *priv)
 static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
-       u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+       u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
        void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 
-       memset(in, 0, sizeof(in));
-
        MLX5_SET(tisc, tisc, prio, tc << 1);
        MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn);
 
+       if (mlx5_lag_is_lacp_owner(mdev))
+               MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
+
        return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]);
 }
 
@@ -2573,6 +2665,7 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
        u16 max_mtu;
        u16 min_mtu;
        int err = 0;
+       bool reset;
 
        mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
 
@@ -2588,13 +2681,18 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
 
        mutex_lock(&priv->state_lock);
 
+       reset = !priv->params.lro_en &&
+               (priv->params.rq_wq_type !=
+                MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
+
        was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
-       if (was_opened)
+       if (was_opened && reset)
                mlx5e_close_locked(netdev);
 
        netdev->mtu = new_mtu;
+       mlx5e_set_dev_port_mtu(netdev);
 
-       if (was_opened)
+       if (was_opened && reset)
                err = mlx5e_open_locked(netdev);
 
        mutex_unlock(&priv->state_lock);
@@ -2794,7 +2892,7 @@ static void mlx5e_tx_timeout(struct net_device *dev)
                if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i)))
                        continue;
                sched_work = true;
-               set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
+               set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state);
                netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n",
                           i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc);
        }
@@ -3228,35 +3326,37 @@ static void mlx5e_destroy_q_counter(struct mlx5e_priv *priv)
 static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
-       struct mlx5_create_mkey_mbox_in *in;
-       struct mlx5_mkey_seg *mkc;
-       int inlen = sizeof(*in);
-       u64 npages =
-               priv->profile->max_nch(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS;
+       u64 npages = MLX5E_REQUIRED_MTTS(priv->profile->max_nch(mdev),
+                                        BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW));
+       int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+       void *mkc;
+       u32 *in;
        int err;
 
        in = mlx5_vzalloc(inlen);
        if (!in)
                return -ENOMEM;
 
-       mkc = &in->seg;
-       mkc->status = MLX5_MKEY_STATUS_FREE;
-       mkc->flags = MLX5_PERM_UMR_EN |
-                    MLX5_PERM_LOCAL_READ |
-                    MLX5_PERM_LOCAL_WRITE |
-                    MLX5_ACCESS_MODE_MTT;
+       mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 
-       mkc->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-       mkc->flags_pd = cpu_to_be32(mdev->mlx5e_res.pdn);
-       mkc->len = cpu_to_be64(npages << PAGE_SHIFT);
-       mkc->xlt_oct_size = cpu_to_be32(mlx5e_get_mtt_octw(npages));
-       mkc->log2_page_size = PAGE_SHIFT;
+       npages = min_t(u32, ALIGN(U16_MAX, 4) * 2, npages);
 
-       err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen, NULL,
-                                   NULL, NULL);
+       MLX5_SET(mkc, mkc, free, 1);
+       MLX5_SET(mkc, mkc, umr_en, 1);
+       MLX5_SET(mkc, mkc, lw, 1);
+       MLX5_SET(mkc, mkc, lr, 1);
+       MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
 
-       kvfree(in);
+       MLX5_SET(mkc, mkc, qpn, 0xffffff);
+       MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn);
+       MLX5_SET64(mkc, mkc, len, npages << PAGE_SHIFT);
+       MLX5_SET(mkc, mkc, translations_octword_size,
+                MLX5_MTT_OCTW(npages));
+       MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
+
+       err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen);
 
+       kvfree(in);
        return err;
 }
 
@@ -3375,6 +3475,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
        struct mlx5_eswitch *esw = mdev->priv.eswitch;
        struct mlx5_eswitch_rep rep;
 
+       mlx5_lag_add(mdev, netdev);
+
        if (mlx5e_vxlan_allowed(mdev)) {
                rtnl_lock();
                udp_tunnel_get_rx_info(netdev);
@@ -3385,6 +3487,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
        queue_work(priv->wq, &priv->set_rx_mode_work);
 
        if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
+               mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id);
                rep.load = mlx5e_nic_rep_load;
                rep.unload = mlx5e_nic_rep_unload;
                rep.vport = 0;
@@ -3397,6 +3500,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
 {
        queue_work(priv->wq, &priv->set_rx_mode_work);
        mlx5e_disable_async_events(priv);
+       mlx5_lag_remove(priv->mdev);
 }
 
 static const struct mlx5e_profile mlx5e_nic_profile = {
@@ -3413,13 +3517,13 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
        .max_tc            = MLX5E_MAX_NUM_TC,
 };
 
-void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
-                         const struct mlx5e_profile *profile, void *ppriv)
+struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
+                                      const struct mlx5e_profile *profile,
+                                      void *ppriv)
 {
+       int nch = profile->max_nch(mdev);
        struct net_device *netdev;
        struct mlx5e_priv *priv;
-       int nch = profile->max_nch(mdev);
-       int err;
 
        netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
                                    nch * profile->max_tc,
@@ -3437,12 +3541,31 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 
        priv->wq = create_singlethread_workqueue("mlx5e");
        if (!priv->wq)
-               goto err_free_netdev;
+               goto err_cleanup_nic;
+
+       return netdev;
+
+err_cleanup_nic:
+       profile->cleanup(priv);
+       free_netdev(netdev);
+
+       return NULL;
+}
+
+int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
+{
+       const struct mlx5e_profile *profile;
+       struct mlx5e_priv *priv;
+       int err;
+
+       priv = netdev_priv(netdev);
+       profile = priv->profile;
+       clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
 
        err = mlx5e_create_umr_mkey(priv);
        if (err) {
                mlx5_core_err(mdev, "create umr mkey failed, %d\n", err);
-               goto err_destroy_wq;
+               goto out;
        }
 
        err = profile->init_tx(priv);
@@ -3463,20 +3586,18 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 
        mlx5e_init_l2_addr(priv);
 
-       err = register_netdev(netdev);
-       if (err) {
-               mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
-               goto err_dealloc_q_counters;
-       }
+       mlx5e_set_dev_port_mtu(netdev);
 
        if (profile->enable)
                profile->enable(priv);
 
-       return priv;
+       rtnl_lock();
+       if (netif_running(netdev))
+               mlx5e_open(netdev);
+       netif_device_attach(netdev);
+       rtnl_unlock();
 
-err_dealloc_q_counters:
-       mlx5e_destroy_q_counter(priv);
-       profile->cleanup_rx(priv);
+       return 0;
 
 err_close_drop_rq:
        mlx5e_close_drop_rq(priv);
@@ -3487,13 +3608,8 @@ err_cleanup_tx:
 err_destroy_umr_mkey:
        mlx5_core_destroy_mkey(mdev, &priv->umr_mkey);
 
-err_destroy_wq:
-       destroy_workqueue(priv->wq);
-
-err_free_netdev:
-       free_netdev(netdev);
-
-       return NULL;
+out:
+       return err;
 }
 
 static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev)
@@ -3501,30 +3617,98 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev)
        struct mlx5_eswitch *esw = mdev->priv.eswitch;
        int total_vfs = MLX5_TOTAL_VPORTS(mdev);
        int vport;
+       u8 mac[ETH_ALEN];
 
        if (!MLX5_CAP_GEN(mdev, vport_group_manager))
                return;
 
+       mlx5_query_nic_vport_mac_address(mdev, 0, mac);
+
        for (vport = 1; vport < total_vfs; vport++) {
                struct mlx5_eswitch_rep rep;
 
                rep.load = mlx5e_vport_rep_load;
                rep.unload = mlx5e_vport_rep_unload;
                rep.vport = vport;
+               ether_addr_copy(rep.hw_id, mac);
                mlx5_eswitch_register_vport_rep(esw, &rep);
        }
 }
 
+void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       const struct mlx5e_profile *profile = priv->profile;
+
+       set_bit(MLX5E_STATE_DESTROYING, &priv->state);
+       if (profile->disable)
+               profile->disable(priv);
+
+       flush_workqueue(priv->wq);
+
+       rtnl_lock();
+       if (netif_running(netdev))
+               mlx5e_close(netdev);
+       netif_device_detach(netdev);
+       rtnl_unlock();
+
+       mlx5e_destroy_q_counter(priv);
+       profile->cleanup_rx(priv);
+       mlx5e_close_drop_rq(priv);
+       profile->cleanup_tx(priv);
+       mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey);
+       cancel_delayed_work_sync(&priv->update_stats_work);
+}
+
+/* mlx5e_attach and mlx5e_detach scope should be only creating/destroying
+ * hardware contexts and to connect it to the current netdev.
+ */
+static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv)
+{
+       struct mlx5e_priv *priv = vpriv;
+       struct net_device *netdev = priv->netdev;
+       int err;
+
+       if (netif_device_present(netdev))
+               return 0;
+
+       err = mlx5e_create_mdev_resources(mdev);
+       if (err)
+               return err;
+
+       err = mlx5e_attach_netdev(mdev, netdev);
+       if (err) {
+               mlx5e_destroy_mdev_resources(mdev);
+               return err;
+       }
+
+       return 0;
+}
+
+static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv)
+{
+       struct mlx5e_priv *priv = vpriv;
+       struct net_device *netdev = priv->netdev;
+
+       if (!netif_device_present(netdev))
+               return;
+
+       mlx5e_detach_netdev(mdev, netdev);
+       mlx5e_destroy_mdev_resources(mdev);
+}
+
 static void *mlx5e_add(struct mlx5_core_dev *mdev)
 {
        struct mlx5_eswitch *esw = mdev->priv.eswitch;
+       int total_vfs = MLX5_TOTAL_VPORTS(mdev);
        void *ppriv = NULL;
-       void *ret;
-
-       if (mlx5e_check_required_hca_cap(mdev))
-               return NULL;
+       void *priv;
+       int vport;
+       int err;
+       struct net_device *netdev;
 
-       if (mlx5e_create_mdev_resources(mdev))
+       err = mlx5e_check_required_hca_cap(mdev);
+       if (err)
                return NULL;
 
        mlx5e_register_vport_rep(mdev);
@@ -3532,12 +3716,39 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
        if (MLX5_CAP_GEN(mdev, vport_group_manager))
                ppriv = &esw->offloads.vport_reps[0];
 
-       ret = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv);
-       if (!ret) {
-               mlx5e_destroy_mdev_resources(mdev);
-               return NULL;
+       netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv);
+       if (!netdev) {
+               mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
+               goto err_unregister_reps;
+       }
+
+       priv = netdev_priv(netdev);
+
+       err = mlx5e_attach(mdev, priv);
+       if (err) {
+               mlx5_core_err(mdev, "mlx5e_attach failed, %d\n", err);
+               goto err_destroy_netdev;
        }
-       return ret;
+
+       err = register_netdev(netdev);
+       if (err) {
+               mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
+               goto err_detach;
+       }
+
+       return priv;
+
+err_detach:
+       mlx5e_detach(mdev, priv);
+
+err_destroy_netdev:
+       mlx5e_destroy_netdev(mdev, priv);
+
+err_unregister_reps:
+       for (vport = 1; vport < total_vfs; vport++)
+               mlx5_eswitch_unregister_vport_rep(esw, vport);
+
+       return NULL;
 }
 
 void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv)
@@ -3545,30 +3756,11 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv)
        const struct mlx5e_profile *profile = priv->profile;
        struct net_device *netdev = priv->netdev;
 
-       set_bit(MLX5E_STATE_DESTROYING, &priv->state);
-       if (profile->disable)
-               profile->disable(priv);
-
-       flush_workqueue(priv->wq);
-       if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) {
-               netif_device_detach(netdev);
-               mlx5e_close(netdev);
-       } else {
-               unregister_netdev(netdev);
-       }
-
-       mlx5e_destroy_q_counter(priv);
-       profile->cleanup_rx(priv);
-       mlx5e_close_drop_rq(priv);
-       profile->cleanup_tx(priv);
-       mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey);
-       cancel_delayed_work_sync(&priv->update_stats_work);
+       unregister_netdev(netdev);
        destroy_workqueue(priv->wq);
        if (profile->cleanup)
                profile->cleanup(priv);
-
-       if (!test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state))
-               free_netdev(netdev);
+       free_netdev(netdev);
 }
 
 static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
@@ -3578,12 +3770,11 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
        struct mlx5e_priv *priv = vpriv;
        int vport;
 
-       mlx5e_destroy_netdev(mdev, priv);
-
        for (vport = 1; vport < total_vfs; vport++)
                mlx5_eswitch_unregister_vport_rep(esw, vport);
 
-       mlx5e_destroy_mdev_resources(mdev);
+       mlx5e_detach(mdev, vpriv);
+       mlx5e_destroy_netdev(mdev, priv);
 }
 
 static void *mlx5e_get_netdev(void *vpriv)
@@ -3596,6 +3787,8 @@ static void *mlx5e_get_netdev(void *vpriv)
 static struct mlx5_interface mlx5e_interface = {
        .add       = mlx5e_add,
        .remove    = mlx5e_remove,
+       .attach    = mlx5e_attach,
+       .detach    = mlx5e_detach,
        .event     = mlx5e_async_event,
        .protocol  = MLX5_INTERFACE_PROTOCOL_ETH,
        .get_dev   = mlx5e_get_netdev,
index 1c7d8b8..3c97da1 100644 (file)
@@ -135,17 +135,16 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
 int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
+       struct mlx5_eswitch_rep *rep = priv->ppriv;
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-       u8 mac[ETH_ALEN];
 
        if (esw->mode == SRIOV_NONE)
                return -EOPNOTSUPP;
 
        switch (attr->id) {
        case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-               mlx5_query_nic_vport_mac_address(priv->mdev, 0, mac);
                attr->u.ppid.id_len = ETH_ALEN;
-               memcpy(&attr->u.ppid.id, &mac, ETH_ALEN);
+               ether_addr_copy(attr->u.ppid.id, rep->hw_id);
                break;
        default:
                return -EOPNOTSUPP;
@@ -414,19 +413,50 @@ static struct mlx5e_profile mlx5e_rep_profile = {
 int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
                         struct mlx5_eswitch_rep *rep)
 {
-       rep->priv_data = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep);
-       if (!rep->priv_data) {
-               pr_warn("Failed to create representor for vport %d\n",
+       struct net_device *netdev;
+       int err;
+
+       netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep);
+       if (!netdev) {
+               pr_warn("Failed to create representor netdev for vport %d\n",
                        rep->vport);
                return -EINVAL;
        }
+
+       rep->priv_data = netdev_priv(netdev);
+
+       err = mlx5e_attach_netdev(esw->dev, netdev);
+       if (err) {
+               pr_warn("Failed to attach representor netdev for vport %d\n",
+                       rep->vport);
+               goto err_destroy_netdev;
+       }
+
+       err = register_netdev(netdev);
+       if (err) {
+               pr_warn("Failed to register representor netdev for vport %d\n",
+                       rep->vport);
+               goto err_detach_netdev;
+       }
+
        return 0;
+
+err_detach_netdev:
+       mlx5e_detach_netdev(esw->dev, netdev);
+
+err_destroy_netdev:
+       mlx5e_destroy_netdev(esw->dev, rep->priv_data);
+
+       return err;
+
 }
 
 void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw,
                            struct mlx5_eswitch_rep *rep)
 {
        struct mlx5e_priv *priv = rep->priv_data;
+       struct net_device *netdev = priv->netdev;
 
+       mlx5e_detach_netdev(esw->dev, netdev);
        mlx5e_destroy_netdev(esw->dev, priv);
 }
index 9f2a16a..dc86779 100644 (file)
@@ -200,7 +200,6 @@ int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
 
        *((dma_addr_t *)skb->cb) = dma_addr;
        wqe->data.addr = cpu_to_be64(dma_addr);
-       wqe->data.lkey = rq->mkey_be;
 
        rq->skb[ix] = skb;
 
@@ -231,44 +230,11 @@ static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
        return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER;
 }
 
-static inline void
-mlx5e_dma_pre_sync_linear_mpwqe(struct device *pdev,
-                               struct mlx5e_mpw_info *wi,
-                               u32 wqe_offset, u32 len)
-{
-       dma_sync_single_for_cpu(pdev, wi->dma_info.addr + wqe_offset,
-                               len, DMA_FROM_DEVICE);
-}
-
-static inline void
-mlx5e_dma_pre_sync_fragmented_mpwqe(struct device *pdev,
-                                   struct mlx5e_mpw_info *wi,
-                                   u32 wqe_offset, u32 len)
-{
-       /* No dma pre sync for fragmented MPWQE */
-}
-
-static inline void
-mlx5e_add_skb_frag_linear_mpwqe(struct mlx5e_rq *rq,
-                               struct sk_buff *skb,
-                               struct mlx5e_mpw_info *wi,
-                               u32 page_idx, u32 frag_offset,
-                               u32 len)
-{
-       unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz);
-
-       wi->skbs_frags[page_idx]++;
-       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-                       &wi->dma_info.page[page_idx], frag_offset,
-                       len, truesize);
-}
-
-static inline void
-mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq,
-                                   struct sk_buff *skb,
-                                   struct mlx5e_mpw_info *wi,
-                                   u32 page_idx, u32 frag_offset,
-                                   u32 len)
+static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq,
+                                           struct sk_buff *skb,
+                                           struct mlx5e_mpw_info *wi,
+                                           u32 page_idx, u32 frag_offset,
+                                           u32 len)
 {
        unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz);
 
@@ -282,24 +248,11 @@ mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq,
 }
 
 static inline void
-mlx5e_copy_skb_header_linear_mpwqe(struct device *pdev,
-                                  struct sk_buff *skb,
-                                  struct mlx5e_mpw_info *wi,
-                                  u32 page_idx, u32 offset,
-                                  u32 headlen)
-{
-       struct page *page = &wi->dma_info.page[page_idx];
-
-       skb_copy_to_linear_data(skb, page_address(page) + offset,
-                               ALIGN(headlen, sizeof(long)));
-}
-
-static inline void
-mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev,
-                                      struct sk_buff *skb,
-                                      struct mlx5e_mpw_info *wi,
-                                      u32 page_idx, u32 offset,
-                                      u32 headlen)
+mlx5e_copy_skb_header_mpwqe(struct device *pdev,
+                           struct sk_buff *skb,
+                           struct mlx5e_mpw_info *wi,
+                           u32 page_idx, u32 offset,
+                           u32 headlen)
 {
        u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset);
        struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[page_idx];
@@ -324,46 +277,9 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev,
        }
 }
 
-static u16 mlx5e_get_wqe_mtt_offset(u16 rq_ix, u16 wqe_ix)
+static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
 {
-       return rq_ix * MLX5_CHANNEL_MAX_NUM_MTTS +
-               wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8);
-}
-
-static void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
-                               struct mlx5e_sq *sq,
-                               struct mlx5e_umr_wqe *wqe,
-                               u16 ix)
-{
-       struct mlx5_wqe_ctrl_seg      *cseg = &wqe->ctrl;
-       struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
-       struct mlx5_wqe_data_seg      *dseg = &wqe->data;
        struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
-       u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS);
-       u16 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq->ix, ix);
-
-       memset(wqe, 0, sizeof(*wqe));
-       cseg->opmod_idx_opcode =
-               cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
-                           MLX5_OPCODE_UMR);
-       cseg->qpn_ds    = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
-                                     ds_cnt);
-       cseg->fm_ce_se  = MLX5_WQE_CTRL_CQ_UPDATE;
-       cseg->imm       = rq->umr_mkey_be;
-
-       ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN;
-       ucseg->klm_octowords =
-               cpu_to_be16(mlx5e_get_mtt_octw(MLX5_MPWRQ_PAGES_PER_WQE));
-       ucseg->bsf_octowords =
-               cpu_to_be16(mlx5e_get_mtt_octw(umr_wqe_mtt_offset));
-       ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
-
-       dseg->lkey = sq->mkey_be;
-       dseg->addr = cpu_to_be64(wi->umr.mtt_addr);
-}
-
-static void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
-{
        struct mlx5e_sq *sq = &rq->channel->icosq;
        struct mlx5_wq_cyc *wq = &sq->wq;
        struct mlx5e_umr_wqe *wqe;
@@ -378,136 +294,153 @@ static void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
        }
 
        wqe = mlx5_wq_cyc_get_wqe(wq, pi);
-       mlx5e_build_umr_wqe(rq, sq, wqe, ix);
+       memcpy(wqe, &wi->umr.wqe, sizeof(*wqe));
+       wqe->ctrl.opmod_idx_opcode =
+               cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
+                           MLX5_OPCODE_UMR);
+
        sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_UMR;
        sq->ico_wqe_info[pi].num_wqebbs = num_wqebbs;
        sq->pc += num_wqebbs;
        mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0);
 }
 
-static inline int mlx5e_get_wqe_mtt_sz(void)
+static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
+                                     struct mlx5e_dma_info *dma_info)
+{
+       struct mlx5e_page_cache *cache = &rq->page_cache;
+       u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
+
+       if (tail_next == cache->head) {
+               rq->stats.cache_full++;
+               return false;
+       }
+
+       cache->page_cache[cache->tail] = *dma_info;
+       cache->tail = tail_next;
+       return true;
+}
+
+static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
+                                     struct mlx5e_dma_info *dma_info)
 {
-       /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
-        * To avoid copying garbage after the mtt array, we allocate
-        * a little more.
-        */
-       return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64),
-                    MLX5_UMR_MTT_ALIGNMENT);
+       struct mlx5e_page_cache *cache = &rq->page_cache;
+
+       if (unlikely(cache->head == cache->tail)) {
+               rq->stats.cache_empty++;
+               return false;
+       }
+
+       if (page_ref_count(cache->page_cache[cache->head].page) != 1) {
+               rq->stats.cache_busy++;
+               return false;
+       }
+
+       *dma_info = cache->page_cache[cache->head];
+       cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
+       rq->stats.cache_reuse++;
+
+       dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE,
+                                  DMA_FROM_DEVICE);
+       return true;
 }
 
-static int mlx5e_alloc_and_map_page(struct mlx5e_rq *rq,
-                                   struct mlx5e_mpw_info *wi,
-                                   int i)
+static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
+                                         struct mlx5e_dma_info *dma_info)
 {
        struct page *page;
 
+       if (mlx5e_rx_cache_get(rq, dma_info))
+               return 0;
+
        page = dev_alloc_page();
        if (unlikely(!page))
                return -ENOMEM;
 
-       wi->umr.dma_info[i].page = page;
-       wi->umr.dma_info[i].addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE,
-                                               PCI_DMA_FROMDEVICE);
-       if (unlikely(dma_mapping_error(rq->pdev, wi->umr.dma_info[i].addr))) {
+       dma_info->page = page;
+       dma_info->addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE,
+                                     DMA_FROM_DEVICE);
+       if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
                put_page(page);
                return -ENOMEM;
        }
-       wi->umr.mtt[i] = cpu_to_be64(wi->umr.dma_info[i].addr | MLX5_EN_WR);
 
        return 0;
 }
 
-static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
-                                          struct mlx5e_rx_wqe *wqe,
-                                          u16 ix)
+void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
+                       bool recycle)
 {
-       struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
-       int mtt_sz = mlx5e_get_wqe_mtt_sz();
-       u32 dma_offset = mlx5e_get_wqe_mtt_offset(rq->ix, ix) << PAGE_SHIFT;
-       int i;
-
-       wi->umr.dma_info = kmalloc(sizeof(*wi->umr.dma_info) *
-                                  MLX5_MPWRQ_PAGES_PER_WQE,
-                                  GFP_ATOMIC);
-       if (unlikely(!wi->umr.dma_info))
-               goto err_out;
+       if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info))
+               return;
 
-       /* We allocate more than mtt_sz as we will align the pointer */
-       wi->umr.mtt_no_align = kzalloc(mtt_sz + MLX5_UMR_ALIGN - 1,
-                                      GFP_ATOMIC);
-       if (unlikely(!wi->umr.mtt_no_align))
-               goto err_free_umr;
+       dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, DMA_FROM_DEVICE);
+       put_page(dma_info->page);
+}
 
-       wi->umr.mtt = PTR_ALIGN(wi->umr.mtt_no_align, MLX5_UMR_ALIGN);
-       wi->umr.mtt_addr = dma_map_single(rq->pdev, wi->umr.mtt, mtt_sz,
-                                         PCI_DMA_TODEVICE);
-       if (unlikely(dma_mapping_error(rq->pdev, wi->umr.mtt_addr)))
-               goto err_free_mtt;
+static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq,
+                                   struct mlx5e_rx_wqe *wqe,
+                                   u16 ix)
+{
+       struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
+       u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT;
+       int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
+       int err;
+       int i;
 
        for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-               if (unlikely(mlx5e_alloc_and_map_page(rq, wi, i)))
+               struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i];
+
+               err = mlx5e_page_alloc_mapped(rq, dma_info);
+               if (unlikely(err))
                        goto err_unmap;
-               page_ref_add(wi->umr.dma_info[i].page,
-                            mlx5e_mpwqe_strides_per_page(rq));
+               wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
+               page_ref_add(dma_info->page, pg_strides);
                wi->skbs_frags[i] = 0;
        }
 
        wi->consumed_strides = 0;
-       wi->dma_pre_sync = mlx5e_dma_pre_sync_fragmented_mpwqe;
-       wi->add_skb_frag = mlx5e_add_skb_frag_fragmented_mpwqe;
-       wi->copy_skb_header = mlx5e_copy_skb_header_fragmented_mpwqe;
-       wi->free_wqe     = mlx5e_free_rx_fragmented_mpwqe;
-       wqe->data.lkey = rq->umr_mkey_be;
        wqe->data.addr = cpu_to_be64(dma_offset);
 
        return 0;
 
 err_unmap:
        while (--i >= 0) {
-               dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE,
-                              PCI_DMA_FROMDEVICE);
-               page_ref_sub(wi->umr.dma_info[i].page,
-                            mlx5e_mpwqe_strides_per_page(rq));
-               put_page(wi->umr.dma_info[i].page);
-       }
-       dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE);
+               struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i];
 
-err_free_mtt:
-       kfree(wi->umr.mtt_no_align);
-
-err_free_umr:
-       kfree(wi->umr.dma_info);
+               page_ref_sub(dma_info->page, pg_strides);
+               mlx5e_page_release(rq, dma_info, true);
+       }
 
-err_out:
-       return -ENOMEM;
+       return err;
 }
 
-void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
-                                   struct mlx5e_mpw_info *wi)
+void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi)
 {
-       int mtt_sz = mlx5e_get_wqe_mtt_sz();
+       int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
        int i;
 
        for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-               dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE,
-                              PCI_DMA_FROMDEVICE);
-               page_ref_sub(wi->umr.dma_info[i].page,
-                       mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i]);
-               put_page(wi->umr.dma_info[i].page);
+               struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i];
+
+               page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]);
+               mlx5e_page_release(rq, dma_info, true);
        }
-       dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE);
-       kfree(wi->umr.mtt_no_align);
-       kfree(wi->umr.dma_info);
 }
 
-void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq)
+void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
 {
        struct mlx5_wq_ll *wq = &rq->wq;
        struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
 
        clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state);
+
+       if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) {
+               mlx5e_free_rx_mpwqe(rq, &rq->wqe_info[wq->head]);
+               return;
+       }
+
        mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index));
-       rq->stats.mpwqe_frag++;
 
        /* ensure wqes are visible to device before updating doorbell record */
        dma_wmb();
@@ -515,106 +448,28 @@ void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq)
        mlx5_wq_ll_update_db_record(wq);
 }
 
-static int mlx5e_alloc_rx_linear_mpwqe(struct mlx5e_rq *rq,
-                                      struct mlx5e_rx_wqe *wqe,
-                                      u16 ix)
-{
-       struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
-       gfp_t gfp_mask;
-       int i;
-
-       gfp_mask = GFP_ATOMIC | __GFP_COLD | __GFP_MEMALLOC;
-       wi->dma_info.page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
-                                            MLX5_MPWRQ_WQE_PAGE_ORDER);
-       if (unlikely(!wi->dma_info.page))
-               return -ENOMEM;
-
-       wi->dma_info.addr = dma_map_page(rq->pdev, wi->dma_info.page, 0,
-                                        rq->wqe_sz, PCI_DMA_FROMDEVICE);
-       if (unlikely(dma_mapping_error(rq->pdev, wi->dma_info.addr))) {
-               put_page(wi->dma_info.page);
-               return -ENOMEM;
-       }
-
-       /* We split the high-order page into order-0 ones and manage their
-        * reference counter to minimize the memory held by small skb fragments
-        */
-       split_page(wi->dma_info.page, MLX5_MPWRQ_WQE_PAGE_ORDER);
-       for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-               page_ref_add(&wi->dma_info.page[i],
-                            mlx5e_mpwqe_strides_per_page(rq));
-               wi->skbs_frags[i] = 0;
-       }
-
-       wi->consumed_strides = 0;
-       wi->dma_pre_sync = mlx5e_dma_pre_sync_linear_mpwqe;
-       wi->add_skb_frag = mlx5e_add_skb_frag_linear_mpwqe;
-       wi->copy_skb_header = mlx5e_copy_skb_header_linear_mpwqe;
-       wi->free_wqe     = mlx5e_free_rx_linear_mpwqe;
-       wqe->data.lkey = rq->mkey_be;
-       wqe->data.addr = cpu_to_be64(wi->dma_info.addr);
-
-       return 0;
-}
-
-void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq,
-                               struct mlx5e_mpw_info *wi)
-{
-       int i;
-
-       dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz,
-                      PCI_DMA_FROMDEVICE);
-       for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-               page_ref_sub(&wi->dma_info.page[i],
-                       mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i]);
-               put_page(&wi->dma_info.page[i]);
-       }
-}
-
-int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
+int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe,        u16 ix)
 {
        int err;
 
-       err = mlx5e_alloc_rx_linear_mpwqe(rq, wqe, ix);
-       if (unlikely(err)) {
-               err = mlx5e_alloc_rx_fragmented_mpwqe(rq, wqe, ix);
-               if (unlikely(err))
-                       return err;
-               set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state);
-               mlx5e_post_umr_wqe(rq, ix);
-               return -EBUSY;
-       }
-
-       return 0;
+       err = mlx5e_alloc_rx_umr_mpwqe(rq, wqe, ix);
+       if (unlikely(err))
+               return err;
+       set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state);
+       mlx5e_post_umr_wqe(rq, ix);
+       return -EBUSY;
 }
 
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 {
        struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
 
-       wi->free_wqe(rq, wi);
-}
-
-void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
-{
-       struct mlx5_wq_ll *wq = &rq->wq;
-       struct mlx5e_rx_wqe *wqe;
-       __be16 wqe_ix_be;
-       u16 wqe_ix;
-
-       while (!mlx5_wq_ll_is_empty(wq)) {
-               wqe_ix_be = *wq->tail_next;
-               wqe_ix    = be16_to_cpu(wqe_ix_be);
-               wqe       = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix);
-               rq->dealloc_wqe(rq, wqe_ix);
-               mlx5_wq_ll_pop(&rq->wq, wqe_ix_be,
-                              &wqe->next.next_wqe_index);
-       }
+       mlx5e_free_rx_mpwqe(rq, wi);
 }
 
 #define RQ_CANNOT_POST(rq) \
-               (!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state) || \
-                test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
+       (test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state) || \
+        test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
 
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
 {
@@ -628,9 +483,10 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
                int err;
 
                err = rq->alloc_wqe(rq, wqe, wq->head);
+               if (err == -EBUSY)
+                       return true;
                if (unlikely(err)) {
-                       if (err != -EBUSY)
-                               rq->stats.buff_alloc_err++;
+                       rq->stats.buff_alloc_err++;
                        break;
                }
 
@@ -648,24 +504,32 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
 static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
                                 u32 cqe_bcnt)
 {
-       struct ethhdr   *eth    = (struct ethhdr *)(skb->data);
-       struct iphdr    *ipv4   = (struct iphdr *)(skb->data + ETH_HLEN);
-       struct ipv6hdr  *ipv6   = (struct ipv6hdr *)(skb->data + ETH_HLEN);
+       struct ethhdr   *eth = (struct ethhdr *)(skb->data);
+       struct iphdr    *ipv4;
+       struct ipv6hdr  *ipv6;
        struct tcphdr   *tcp;
+       int network_depth = 0;
+       __be16 proto;
+       u16 tot_len;
 
        u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
        int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA  == l4_hdr_type) ||
                       (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type));
 
-       u16 tot_len = cqe_bcnt - ETH_HLEN;
+       skb->mac_len = ETH_HLEN;
+       proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth);
+
+       ipv4 = (struct iphdr *)(skb->data + network_depth);
+       ipv6 = (struct ipv6hdr *)(skb->data + network_depth);
+       tot_len = cqe_bcnt - network_depth;
 
-       if (eth->h_proto == htons(ETH_P_IP)) {
-               tcp = (struct tcphdr *)(skb->data + ETH_HLEN +
+       if (proto == htons(ETH_P_IP)) {
+               tcp = (struct tcphdr *)(skb->data + network_depth +
                                        sizeof(struct iphdr));
                ipv6 = NULL;
                skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
        } else {
-               tcp = (struct tcphdr *)(skb->data + ETH_HLEN +
+               tcp = (struct tcphdr *)(skb->data + network_depth +
                                        sizeof(struct ipv6hdr));
                ipv4 = NULL;
                skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
@@ -834,7 +698,6 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq,
                                           u32 cqe_bcnt,
                                           struct sk_buff *skb)
 {
-       u32 consumed_bytes = ALIGN(cqe_bcnt, rq->mpwqe_stride_sz);
        u16 stride_ix      = mpwrq_get_cqe_stride_index(cqe);
        u32 wqe_offset     = stride_ix * rq->mpwqe_stride_sz;
        u32 head_offset    = wqe_offset & (PAGE_SIZE - 1);
@@ -848,21 +711,20 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq,
                page_idx++;
                frag_offset -= PAGE_SIZE;
        }
-       wi->dma_pre_sync(rq->pdev, wi, wqe_offset, consumed_bytes);
 
        while (byte_cnt) {
                u32 pg_consumed_bytes =
                        min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
 
-               wi->add_skb_frag(rq, skb, wi, page_idx, frag_offset,
-                                pg_consumed_bytes);
+               mlx5e_add_skb_frag_mpwqe(rq, skb, wi, page_idx, frag_offset,
+                                        pg_consumed_bytes);
                byte_cnt -= pg_consumed_bytes;
                frag_offset = 0;
                page_idx++;
        }
        /* copy header */
-       wi->copy_skb_header(rq->pdev, skb, wi, head_page_idx, head_offset,
-                           headlen);
+       mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, wi, head_page_idx,
+                                   head_offset, headlen);
        /* skb linear part was allocated with headlen and aligned to long */
        skb->tail += headlen;
        skb->len  += headlen;
@@ -907,7 +769,7 @@ mpwrq_cqe_out:
        if (likely(wi->consumed_strides < rq->mpwqe_num_strides))
                return;
 
-       wi->free_wqe(rq, wi);
+       mlx5e_free_rx_mpwqe(rq, wi);
        mlx5_wq_ll_pop(&rq->wq, cqe->wqe_id, &wqe->next.next_wqe_index);
 }
 
@@ -916,7 +778,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
        struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
        int work_done = 0;
 
-       if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state)))
+       if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state)))
                return 0;
 
        if (cq->decmprs_left)
index 7b9d8a9..6af8d79 100644 (file)
@@ -70,12 +70,16 @@ struct mlx5e_sw_stats {
        u64 tx_queue_stopped;
        u64 tx_queue_wake;
        u64 tx_queue_dropped;
+       u64 tx_xmit_more;
        u64 rx_wqe_err;
        u64 rx_mpwqe_filler;
-       u64 rx_mpwqe_frag;
        u64 rx_buff_alloc_err;
        u64 rx_cqe_compress_blks;
        u64 rx_cqe_compress_pkts;
+       u64 rx_cache_reuse;
+       u64 rx_cache_full;
+       u64 rx_cache_empty;
+       u64 rx_cache_busy;
 
        /* Special handling counters */
        u64 link_down_events_phy;
@@ -101,12 +105,16 @@ static const struct counter_desc sw_stats_desc[] = {
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) },
-       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) },
 };
 
@@ -272,10 +280,13 @@ struct mlx5e_rq_stats {
        u64 lro_bytes;
        u64 wqe_err;
        u64 mpwqe_filler;
-       u64 mpwqe_frag;
        u64 buff_alloc_err;
        u64 cqe_compress_blks;
        u64 cqe_compress_pkts;
+       u64 cache_reuse;
+       u64 cache_full;
+       u64 cache_empty;
+       u64 cache_busy;
 };
 
 static const struct counter_desc rq_stats_desc[] = {
@@ -288,16 +299,20 @@ static const struct counter_desc rq_stats_desc[] = {
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) },
-       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_frag) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
 };
 
 struct mlx5e_sq_stats {
        /* commonly accessed in data path */
        u64 packets;
        u64 bytes;
+       u64 xmit_more;
        u64 tso_packets;
        u64 tso_bytes;
        u64 tso_inner_packets;
@@ -324,6 +339,7 @@ static const struct counter_desc sq_stats_desc[] = {
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
        { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
+       { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
 };
 
 #define NUM_SW_COUNTERS                        ARRAY_SIZE(sw_stats_desc)
index dc8b1cb..22cfc4a 100644 (file)
@@ -170,7 +170,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
                struct flow_dissector_key_control *key =
                        skb_flow_dissector_target(f->dissector,
-                                                 FLOW_DISSECTOR_KEY_BASIC,
+                                                 FLOW_DISSECTOR_KEY_CONTROL,
                                                  f->key);
                addr_type = key->addr_type;
        }
index e073bf5..eb0e725 100644 (file)
@@ -356,6 +356,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
                sq->stats.stopped++;
        }
 
+       sq->stats.xmit_more += skb->xmit_more;
        if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) {
                int bf_sz = 0;
 
@@ -394,35 +395,6 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
        return mlx5e_sq_xmit(sq, skb);
 }
 
-void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
-{
-       struct mlx5e_tx_wqe_info *wi;
-       struct sk_buff *skb;
-       u16 ci;
-       int i;
-
-       while (sq->cc != sq->pc) {
-               ci = sq->cc & sq->wq.sz_m1;
-               skb = sq->skb[ci];
-               wi = &sq->wqe_info[ci];
-
-               if (!skb) { /* nop */
-                       sq->cc++;
-                       continue;
-               }
-
-               for (i = 0; i < wi->num_dma; i++) {
-                       struct mlx5e_sq_dma *dma =
-                               mlx5e_dma_get(sq, sq->dma_fifo_cc++);
-
-                       mlx5e_tx_dma_unmap(sq->pdev, dma);
-               }
-
-               dev_kfree_skb_any(skb);
-               sq->cc += wi->num_wqebbs;
-       }
-}
-
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 {
        struct mlx5e_sq *sq;
@@ -434,7 +406,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 
        sq = container_of(cq, struct mlx5e_sq, cq);
 
-       if (unlikely(test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)))
+       if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state)))
                return false;
 
        npkts = 0;
@@ -512,11 +484,39 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
        netdev_tx_completed_queue(sq->txq, npkts, nbytes);
 
        if (netif_tx_queue_stopped(sq->txq) &&
-           mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM) &&
-           likely(test_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state))) {
-                               netif_tx_wake_queue(sq->txq);
-                               sq->stats.wake++;
+           mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM)) {
+               netif_tx_wake_queue(sq->txq);
+               sq->stats.wake++;
        }
 
        return (i == MLX5E_TX_CQ_POLL_BUDGET);
 }
+
+void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
+{
+       struct mlx5e_tx_wqe_info *wi;
+       struct sk_buff *skb;
+       u16 ci;
+       int i;
+
+       while (sq->cc != sq->pc) {
+               ci = sq->cc & sq->wq.sz_m1;
+               skb = sq->skb[ci];
+               wi = &sq->wqe_info[ci];
+
+               if (!skb) { /* nop */
+                       sq->cc++;
+                       continue;
+               }
+
+               for (i = 0; i < wi->num_dma; i++) {
+                       struct mlx5e_sq_dma *dma =
+                               mlx5e_dma_get(sq, sq->dma_fifo_cc++);
+
+                       mlx5e_tx_dma_unmap(sq->pdev, dma);
+               }
+
+               dev_kfree_skb_any(skb);
+               sq->cc += wi->num_wqebbs;
+       }
+}
index 64ae2e8..08d8b0c 100644 (file)
@@ -51,16 +51,18 @@ struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq)
 
 static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 {
+       struct mlx5e_sq *sq = container_of(cq, struct mlx5e_sq, cq);
        struct mlx5_wq_cyc *wq;
        struct mlx5_cqe64 *cqe;
-       struct mlx5e_sq *sq;
        u16 sqcc;
 
+       if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state)))
+               return;
+
        cqe = mlx5e_get_cqe(cq);
        if (likely(!cqe))
                return;
 
-       sq = container_of(cq, struct mlx5e_sq, cq);
        wq = &sq->wq;
 
        /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
@@ -85,7 +87,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
                case MLX5_OPCODE_NOP:
                        break;
                case MLX5_OPCODE_UMR:
-                       mlx5e_post_rx_fragmented_mpwqe(&sq->channel->rq);
+                       mlx5e_post_rx_mpwqe(&sq->channel->rq);
                        break;
                default:
                        WARN_ONCE(true,
index 0e30602..aaca090 100644 (file)
@@ -86,23 +86,12 @@ struct cre_des_eq {
 
 static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
 {
-       struct mlx5_destroy_eq_mbox_in in;
-       struct mlx5_destroy_eq_mbox_out out;
-       int err;
-
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_EQ);
-       in.eqn = eqn;
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-       if (!err)
-               goto ex;
-
-       if (out.hdr.status)
-               err = mlx5_cmd_status_to_err(&out.hdr);
+       u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(destroy_eq_in)]   = {0};
 
-ex:
-       return err;
+       MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
+       MLX5_SET(destroy_eq_in, in, eq_number, eqn);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
@@ -351,11 +340,13 @@ static void init_eq_buf(struct mlx5_eq *eq)
 int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
                       int nent, u64 mask, const char *name, struct mlx5_uar *uar)
 {
+       u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
        struct mlx5_priv *priv = &dev->priv;
-       struct mlx5_create_eq_mbox_in *in;
-       struct mlx5_create_eq_mbox_out out;
-       int err;
+       __be64 *pas;
+       void *eqc;
        int inlen;
+       u32 *in;
+       int err;
 
        eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
        eq->cons_index = 0;
@@ -365,35 +356,36 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 
        init_eq_buf(eq);
 
-       inlen = sizeof(*in) + sizeof(in->pas[0]) * eq->buf.npages;
+       inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
+               MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages;
+
        in = mlx5_vzalloc(inlen);
        if (!in) {
                err = -ENOMEM;
                goto err_buf;
        }
-       memset(&out, 0, sizeof(out));
 
-       mlx5_fill_page_array(&eq->buf, in->pas);
+       pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
+       mlx5_fill_page_array(&eq->buf, pas);
 
-       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
-       in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index);
-       in->ctx.intr = vecidx;
-       in->ctx.log_page_size = eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
-       in->events_mask = cpu_to_be64(mask);
+       MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
+       MLX5_SET64(create_eq_in, in, event_bitmask, mask);
 
-       err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
-       if (err)
-               goto err_in;
+       eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
+       MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
+       MLX5_SET(eqc, eqc, uar_page, uar->index);
+       MLX5_SET(eqc, eqc, intr, vecidx);
+       MLX5_SET(eqc, eqc, log_page_size,
+                eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 
-       if (out.hdr.status) {
-               err = mlx5_cmd_status_to_err(&out.hdr);
+       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+       if (err)
                goto err_in;
-       }
 
        snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
                 name, pci_name(dev->pdev));
 
-       eq->eqn = out.eq_number;
+       eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
        eq->irqn = priv->msix_arr[vecidx].vector;
        eq->dev = dev;
        eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET;
@@ -547,22 +539,12 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev)
 }
 
 int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
-                      struct mlx5_query_eq_mbox_out *out, int outlen)
+                      u32 *out, int outlen)
 {
-       struct mlx5_query_eq_mbox_in in;
-       int err;
-
-       memset(&in, 0, sizeof(in));
-       memset(out, 0, outlen);
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_EQ);
-       in.eqn = eq->eqn;
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
-       if (err)
-               return err;
+       u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0};
 
-       if (out->hdr.status)
-               err = mlx5_cmd_status_to_err(&out->hdr);
-
-       return err;
+       MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
+       MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 EXPORT_SYMBOL_GPL(mlx5_core_eq_query);
index f6d6677..654b76f 100644 (file)
@@ -87,13 +87,9 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
 static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
                                        u32 events_mask)
 {
-       int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)];
-       int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+       int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)]   = {0};
+       int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0};
        void *nic_vport_ctx;
-       int err;
-
-       memset(out, 0, sizeof(out));
-       memset(in, 0, sizeof(in));
 
        MLX5_SET(modify_nic_vport_context_in, in,
                 opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
@@ -116,99 +112,27 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
                MLX5_SET(nic_vport_context, nic_vport_ctx,
                         event_on_promisc_change, 1);
 
-       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-       if (err)
-               goto ex;
-       err = mlx5_cmd_status_to_err_v2(out);
-       if (err)
-               goto ex;
-       return 0;
-ex:
-       return err;
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 /* E-Switch vport context HW commands */
-static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport,
-                                      u32 *out, int outlen)
-{
-       u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)];
-
-       memset(in, 0, sizeof(in));
-
-       MLX5_SET(query_nic_vport_context_in, in, opcode,
-                MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
-
-       MLX5_SET(query_esw_vport_context_in, in, vport_number, vport);
-       if (vport)
-               MLX5_SET(query_esw_vport_context_in, in, other_vport, 1);
-
-       return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen);
-}
-
-static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
-                                u16 *vlan, u8 *qos)
-{
-       u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)];
-       int err;
-       bool cvlan_strip;
-       bool cvlan_insert;
-
-       memset(out, 0, sizeof(out));
-
-       *vlan = 0;
-       *qos = 0;
-
-       if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
-           !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
-               return -ENOTSUPP;
-
-       err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out));
-       if (err)
-               goto out;
-
-       cvlan_strip = MLX5_GET(query_esw_vport_context_out, out,
-                              esw_vport_context.vport_cvlan_strip);
-
-       cvlan_insert = MLX5_GET(query_esw_vport_context_out, out,
-                               esw_vport_context.vport_cvlan_insert);
-
-       if (cvlan_strip || cvlan_insert) {
-               *vlan = MLX5_GET(query_esw_vport_context_out, out,
-                                esw_vport_context.cvlan_id);
-               *qos = MLX5_GET(query_esw_vport_context_out, out,
-                               esw_vport_context.cvlan_pcp);
-       }
-
-       esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n",
-                 vport, *vlan, *qos);
-out:
-       return err;
-}
-
 static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
                                        void *in, int inlen)
 {
-       u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)];
-
-       memset(out, 0, sizeof(out));
+       u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)] = {0};
 
+       MLX5_SET(modify_esw_vport_context_in, in, opcode,
+                MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
        MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
        if (vport)
                MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
-
-       MLX5_SET(modify_esw_vport_context_in, in, opcode,
-                MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
-
-       return mlx5_cmd_exec_check_status(dev, in, inlen,
-                                         out, sizeof(out));
+       return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 
 static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
                                  u16 vlan, u8 qos, bool set)
 {
-       u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)];
-
-       memset(in, 0, sizeof(in));
+       u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0};
 
        if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
            !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
@@ -216,7 +140,6 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
 
        esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n",
                  vport, vlan, qos, set);
-
        if (set) {
                MLX5_SET(modify_esw_vport_context_in, in,
                         esw_vport_context.vport_cvlan_strip, 1);
@@ -241,13 +164,10 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
 static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index,
                                  u8 *mac, u8 vlan_valid, u16 vlan)
 {
-       u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)];
-       u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)];
+       u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0};
        u8 *in_mac_addr;
 
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
-
        MLX5_SET(set_l2_table_entry_in, in, opcode,
                 MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
        MLX5_SET(set_l2_table_entry_in, in, table_index, index);
@@ -257,23 +177,18 @@ static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index,
        in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address);
        ether_addr_copy(&in_mac_addr[2], mac);
 
-       return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
-                                         out, sizeof(out));
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
 {
-       u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)];
-       u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)];
-
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
+       u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0};
 
        MLX5_SET(delete_l2_table_entry_in, in, opcode,
                 MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
        MLX5_SET(delete_l2_table_entry_in, in, table_index, index);
-       return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
-                                         out, sizeof(out));
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix)
@@ -340,7 +255,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 
        spec = mlx5_vzalloc(sizeof(*spec));
        if (!spec) {
-               pr_warn("FDB: Failed to alloc match parameters\n");
+               esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n");
                return NULL;
        }
        dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
@@ -374,8 +289,8 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
                                   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
                                   0, &dest);
        if (IS_ERR(flow_rule)) {
-               pr_warn(
-                       "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
+               esw_warn(esw->dev,
+                        "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
                         dmac_v, dmac_c, vport, PTR_ERR(flow_rule));
                flow_rule = NULL;
        }
@@ -955,7 +870,7 @@ static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num)
        esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n",
                  vport_num, promisc_all, promisc_mc);
 
-       if (!vport->trusted || !vport->enabled) {
+       if (!vport->info.trusted || !vport->enabled) {
                promisc_uc = 0;
                promisc_mc = 0;
                promisc_all = 0;
@@ -1291,30 +1206,20 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
                                    struct mlx5_vport *vport)
 {
        struct mlx5_flow_spec *spec;
-       u8 smac[ETH_ALEN];
        int err = 0;
        u8 *smac_v;
 
-       if (vport->spoofchk) {
-               err = mlx5_query_nic_vport_mac_address(esw->dev, vport->vport, smac);
-               if (err) {
-                       esw_warn(esw->dev,
-                                "vport[%d] configure ingress rules failed, query smac failed, err(%d)\n",
-                                vport->vport, err);
-                       return err;
-               }
+       if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) {
+               mlx5_core_warn(esw->dev,
+                              "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n",
+                              vport->vport);
+               return -EPERM;
 
-               if (!is_valid_ether_addr(smac)) {
-                       mlx5_core_warn(esw->dev,
-                                      "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n",
-                                      vport->vport);
-                       return -EPERM;
-               }
        }
 
        esw_vport_cleanup_ingress_rules(esw, vport);
 
-       if (!vport->vlan && !vport->qos && !vport->spoofchk) {
+       if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
                esw_vport_disable_ingress_acl(esw, vport);
                return 0;
        }
@@ -1323,7 +1228,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 
        esw_debug(esw->dev,
                  "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
-                 vport->vport, vport->vlan, vport->qos);
+                 vport->vport, vport->info.vlan, vport->info.qos);
 
        spec = mlx5_vzalloc(sizeof(*spec));
        if (!spec) {
@@ -1333,16 +1238,16 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
                goto out;
        }
 
-       if (vport->vlan || vport->qos)
+       if (vport->info.vlan || vport->info.qos)
                MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag);
 
-       if (vport->spoofchk) {
+       if (vport->info.spoofchk) {
                MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16);
                MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0);
                smac_v = MLX5_ADDR_OF(fte_match_param,
                                      spec->match_value,
                                      outer_headers.smac_47_16);
-               ether_addr_copy(smac_v, smac);
+               ether_addr_copy(smac_v, vport->info.mac);
        }
 
        spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
@@ -1352,8 +1257,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
                                   0, NULL);
        if (IS_ERR(vport->ingress.allow_rule)) {
                err = PTR_ERR(vport->ingress.allow_rule);
-               pr_warn("vport[%d] configure ingress allow rule, err(%d)\n",
-                       vport->vport, err);
+               esw_warn(esw->dev,
+                        "vport[%d] configure ingress allow rule, err(%d)\n",
+                        vport->vport, err);
                vport->ingress.allow_rule = NULL;
                goto out;
        }
@@ -1365,8 +1271,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
                                   0, NULL);
        if (IS_ERR(vport->ingress.drop_rule)) {
                err = PTR_ERR(vport->ingress.drop_rule);
-               pr_warn("vport[%d] configure ingress drop rule, err(%d)\n",
-                       vport->vport, err);
+               esw_warn(esw->dev,
+                        "vport[%d] configure ingress drop rule, err(%d)\n",
+                        vport->vport, err);
                vport->ingress.drop_rule = NULL;
                goto out;
        }
@@ -1386,7 +1293,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 
        esw_vport_cleanup_egress_rules(esw, vport);
 
-       if (!vport->vlan && !vport->qos) {
+       if (!vport->info.vlan && !vport->info.qos) {
                esw_vport_disable_egress_acl(esw, vport);
                return 0;
        }
@@ -1395,7 +1302,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 
        esw_debug(esw->dev,
                  "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
-                 vport->vport, vport->vlan, vport->qos);
+                 vport->vport, vport->info.vlan, vport->info.qos);
 
        spec = mlx5_vzalloc(sizeof(*spec));
        if (!spec) {
@@ -1409,7 +1316,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
        MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag);
        MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.vlan_tag);
        MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
-       MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->vlan);
+       MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan);
 
        spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
        vport->egress.allowed_vlan =
@@ -1418,8 +1325,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
                                   0, NULL);
        if (IS_ERR(vport->egress.allowed_vlan)) {
                err = PTR_ERR(vport->egress.allowed_vlan);
-               pr_warn("vport[%d] configure egress allowed vlan rule failed, err(%d)\n",
-                       vport->vport, err);
+               esw_warn(esw->dev,
+                        "vport[%d] configure egress allowed vlan rule failed, err(%d)\n",
+                        vport->vport, err);
                vport->egress.allowed_vlan = NULL;
                goto out;
        }
@@ -1432,8 +1340,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
                                   0, NULL);
        if (IS_ERR(vport->egress.drop_rule)) {
                err = PTR_ERR(vport->egress.drop_rule);
-               pr_warn("vport[%d] configure egress drop rule failed, err(%d)\n",
-                       vport->vport, err);
+               esw_warn(esw->dev,
+                        "vport[%d] configure egress drop rule failed, err(%d)\n",
+                        vport->vport, err);
                vport->egress.drop_rule = NULL;
        }
 out:
@@ -1441,6 +1350,41 @@ out:
        return err;
 }
 
+static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
+{
+       ((u8 *)node_guid)[7] = mac[0];
+       ((u8 *)node_guid)[6] = mac[1];
+       ((u8 *)node_guid)[5] = mac[2];
+       ((u8 *)node_guid)[4] = 0xff;
+       ((u8 *)node_guid)[3] = 0xfe;
+       ((u8 *)node_guid)[2] = mac[3];
+       ((u8 *)node_guid)[1] = mac[4];
+       ((u8 *)node_guid)[0] = mac[5];
+}
+
+static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
+                                struct mlx5_vport *vport)
+{
+       int vport_num = vport->vport;
+
+       if (!vport_num)
+               return;
+
+       mlx5_modify_vport_admin_state(esw->dev,
+                                     MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+                                     vport_num,
+                                     vport->info.link_state);
+       mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac);
+       mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid);
+       modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos,
+                              (vport->info.vlan || vport->info.qos));
+
+       /* Only legacy mode needs ACLs */
+       if (esw->mode == SRIOV_LEGACY) {
+               esw_vport_ingress_config(esw, vport);
+               esw_vport_egress_config(esw, vport);
+       }
+}
 static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
                             int enable_events)
 {
@@ -1451,22 +1395,17 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
 
        esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
 
-       if (vport_num) { /* Only VFs need ACLs for VST and spoofchk filtering */
-               esw_vport_ingress_config(esw, vport);
-               esw_vport_egress_config(esw, vport);
-       }
-
-       mlx5_modify_vport_admin_state(esw->dev,
-                                     MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
-                                     vport_num,
-                                     MLX5_ESW_VPORT_ADMIN_STATE_AUTO);
+       /* Restore old vport configuration */
+       esw_apply_vport_conf(esw, vport);
 
        /* Sync with current vport context */
        vport->enabled_events = enable_events;
        vport->enabled = true;
 
        /* only PF is trusted by default */
-       vport->trusted = (vport_num) ? false : true;
+       if (!vport_num)
+               vport->info.trusted = true;
+
        esw_vport_change_handle_locked(vport);
 
        esw->enabled_vports++;
@@ -1486,11 +1425,6 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
        vport->enabled = false;
 
        synchronize_irq(mlx5_get_msix_vec(esw->dev, MLX5_EQ_VEC_ASYNC));
-
-       mlx5_modify_vport_admin_state(esw->dev,
-                                     MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
-                                     vport_num,
-                                     MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
        /* Wait for current already scheduled events to complete */
        flush_workqueue(esw->work_queue);
        /* Disable events from this vport */
@@ -1502,7 +1436,12 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
         */
        esw_vport_change_handle_locked(vport);
        vport->enabled_events = 0;
-       if (vport_num) {
+
+       if (vport_num && esw->mode == SRIOV_LEGACY) {
+               mlx5_modify_vport_admin_state(esw->dev,
+                                             MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+                                             vport_num,
+                                             MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
                esw_vport_disable_egress_acl(esw, vport);
                esw_vport_disable_ingress_acl(esw, vport);
        }
@@ -1588,6 +1527,25 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
        esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
 }
 
+void mlx5_eswitch_attach(struct mlx5_eswitch *esw)
+{
+       if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
+           MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+               return;
+
+       esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
+       /* VF Vports will be enabled when SRIOV is enabled */
+}
+
+void mlx5_eswitch_detach(struct mlx5_eswitch *esw)
+{
+       if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
+           MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+               return;
+
+       esw_disable_vport(esw, 0);
+}
+
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 {
        int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
@@ -1655,6 +1613,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
                struct mlx5_vport *vport = &esw->vports[vport_num];
 
                vport->vport = vport_num;
+               vport->info.link_state = MLX5_ESW_VPORT_ADMIN_STATE_AUTO;
                vport->dev = dev;
                INIT_WORK(&vport->vport_change_handler,
                          esw_vport_change_handler);
@@ -1665,8 +1624,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
        esw->mode = SRIOV_NONE;
 
        dev->priv.eswitch = esw;
-       esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
-       /* VF Vports will be enabled when SRIOV is enabled */
        return 0;
 abort:
        if (esw->work_queue)
@@ -1685,7 +1642,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
                return;
 
        esw_info(esw->dev, "cleanup\n");
-       esw_disable_vport(esw, 0);
 
        esw->dev->priv.eswitch = NULL;
        destroy_workqueue(esw->work_queue);
@@ -1718,18 +1674,6 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
        (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev))
 #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
 
-static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
-{
-       ((u8 *)node_guid)[7] = mac[0];
-       ((u8 *)node_guid)[6] = mac[1];
-       ((u8 *)node_guid)[5] = mac[2];
-       ((u8 *)node_guid)[4] = 0xff;
-       ((u8 *)node_guid)[3] = 0xfe;
-       ((u8 *)node_guid)[2] = mac[3];
-       ((u8 *)node_guid)[1] = mac[4];
-       ((u8 *)node_guid)[0] = mac[5];
-}
-
 int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
                               int vport, u8 mac[ETH_ALEN])
 {
@@ -1742,13 +1686,15 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
        if (!LEGAL_VPORT(esw, vport))
                return -EINVAL;
 
+       mutex_lock(&esw->state_lock);
        evport = &esw->vports[vport];
 
-       if (evport->spoofchk && !is_valid_ether_addr(mac)) {
+       if (evport->info.spoofchk && !is_valid_ether_addr(mac)) {
                mlx5_core_warn(esw->dev,
                               "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n",
                               vport);
-               return -EPERM;
+               err = -EPERM;
+               goto unlock;
        }
 
        err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac);
@@ -1756,7 +1702,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
                mlx5_core_warn(esw->dev,
                               "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n",
                               vport, err);
-               return err;
+               goto unlock;
        }
 
        node_guid_gen_from_mac(&node_guid, mac);
@@ -1766,9 +1712,12 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
                               "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n",
                               vport, err);
 
-       mutex_lock(&esw->state_lock);
-       if (evport->enabled)
+       ether_addr_copy(evport->info.mac, mac);
+       evport->info.node_guid = node_guid;
+       if (evport->enabled && esw->mode == SRIOV_LEGACY)
                err = esw_vport_ingress_config(esw, evport);
+
+unlock:
        mutex_unlock(&esw->state_lock);
        return err;
 }
@@ -1776,22 +1725,38 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
 int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
                                 int vport, int link_state)
 {
+       struct mlx5_vport *evport;
+       int err = 0;
+
        if (!ESW_ALLOWED(esw))
                return -EPERM;
        if (!LEGAL_VPORT(esw, vport))
                return -EINVAL;
 
-       return mlx5_modify_vport_admin_state(esw->dev,
-                                            MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
-                                            vport, link_state);
+       mutex_lock(&esw->state_lock);
+       evport = &esw->vports[vport];
+
+       err = mlx5_modify_vport_admin_state(esw->dev,
+                                           MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+                                           vport, link_state);
+       if (err) {
+               mlx5_core_warn(esw->dev,
+                              "Failed to set vport %d link state, err = %d",
+                              vport, err);
+               goto unlock;
+       }
+
+       evport->info.link_state = link_state;
+
+unlock:
+       mutex_unlock(&esw->state_lock);
+       return 0;
 }
 
 int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
                                  int vport, struct ifla_vf_info *ivi)
 {
        struct mlx5_vport *evport;
-       u16 vlan;
-       u8 qos;
 
        if (!ESW_ALLOWED(esw))
                return -EPERM;
@@ -1803,14 +1768,14 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
        memset(ivi, 0, sizeof(*ivi));
        ivi->vf = vport - 1;
 
-       mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac);
-       ivi->linkstate = mlx5_query_vport_admin_state(esw->dev,
-                                                     MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
-                                                     vport);
-       query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos);
-       ivi->vlan = vlan;
-       ivi->qos = qos;
-       ivi->spoofchk = evport->spoofchk;
+       mutex_lock(&esw->state_lock);
+       ether_addr_copy(ivi->mac, evport->info.mac);
+       ivi->linkstate = evport->info.link_state;
+       ivi->vlan = evport->info.vlan;
+       ivi->qos = evport->info.qos;
+       ivi->spoofchk = evport->info.spoofchk;
+       ivi->trusted = evport->info.trusted;
+       mutex_unlock(&esw->state_lock);
 
        return 0;
 }
@@ -1830,23 +1795,23 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
        if (vlan || qos)
                set = 1;
 
+       mutex_lock(&esw->state_lock);
        evport = &esw->vports[vport];
 
        err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set);
        if (err)
-               return err;
+               goto unlock;
 
-       mutex_lock(&esw->state_lock);
-       evport->vlan = vlan;
-       evport->qos = qos;
-       if (evport->enabled) {
+       evport->info.vlan = vlan;
+       evport->info.qos = qos;
+       if (evport->enabled && esw->mode == SRIOV_LEGACY) {
                err = esw_vport_ingress_config(esw, evport);
                if (err)
-                       goto out;
+                       goto unlock;
                err = esw_vport_egress_config(esw, evport);
        }
 
-out:
+unlock:
        mutex_unlock(&esw->state_lock);
        return err;
 }
@@ -1863,15 +1828,14 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
        if (!LEGAL_VPORT(esw, vport))
                return -EINVAL;
 
-       evport = &esw->vports[vport];
-
        mutex_lock(&esw->state_lock);
-       pschk = evport->spoofchk;
-       evport->spoofchk = spoofchk;
-       if (evport->enabled)
+       evport = &esw->vports[vport];
+       pschk = evport->info.spoofchk;
+       evport->info.spoofchk = spoofchk;
+       if (evport->enabled && esw->mode == SRIOV_LEGACY)
                err = esw_vport_ingress_config(esw, evport);
        if (err)
-               evport->spoofchk = pschk;
+               evport->info.spoofchk = pschk;
        mutex_unlock(&esw->state_lock);
 
        return err;
@@ -1887,10 +1851,9 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
        if (!LEGAL_VPORT(esw, vport))
                return -EINVAL;
 
-       evport = &esw->vports[vport];
-
        mutex_lock(&esw->state_lock);
-       evport->trusted = setting;
+       evport = &esw->vports[vport];
+       evport->info.trusted = setting;
        if (evport->enabled)
                esw_vport_change_handle_locked(evport);
        mutex_unlock(&esw->state_lock);
@@ -1903,7 +1866,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
                                 struct ifla_vf_stats *vf_stats)
 {
        int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
-       u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
+       u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0};
        int err = 0;
        u32 *out;
 
@@ -1916,8 +1879,6 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
        if (!out)
                return -ENOMEM;
 
-       memset(in, 0, sizeof(in));
-
        MLX5_SET(query_vport_counter_in, in, opcode,
                 MLX5_CMD_OP_QUERY_VPORT_COUNTER);
        MLX5_SET(query_vport_counter_in, in, op_mod, 0);
index c0b0560..6855783 100644 (file)
@@ -109,6 +109,16 @@ struct vport_egress {
        struct mlx5_flow_rule  *drop_rule;
 };
 
+struct mlx5_vport_info {
+       u8                      mac[ETH_ALEN];
+       u16                     vlan;
+       u8                      qos;
+       u64                     node_guid;
+       int                     link_state;
+       bool                    spoofchk;
+       bool                    trusted;
+};
+
 struct mlx5_vport {
        struct mlx5_core_dev    *dev;
        int                     vport;
@@ -121,10 +131,8 @@ struct mlx5_vport {
        struct vport_ingress    ingress;
        struct vport_egress     egress;
 
-       u16                     vlan;
-       u8                      qos;
-       bool                    spoofchk;
-       bool                    trusted;
+       struct mlx5_vport_info  info;
+
        bool                    enabled;
        u16                     enabled_events;
 };
@@ -174,6 +182,7 @@ struct mlx5_eswitch_rep {
        void                  *priv_data;
        struct list_head       vport_sqs_list;
        bool                   valid;
+       u8                     hw_id[ETH_ALEN];
 };
 
 struct mlx5_esw_offload {
@@ -203,6 +212,8 @@ struct mlx5_eswitch {
 /* E-Switch API */
 int mlx5_eswitch_init(struct mlx5_core_dev *dev);
 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
+void mlx5_eswitch_attach(struct mlx5_eswitch *esw);
+void mlx5_eswitch_detach(struct mlx5_eswitch *esw);
 void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
 int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
index a357e8e..3dc83a9 100644 (file)
@@ -113,7 +113,7 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn
        dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
        dest.vport_num = vport;
 
-       flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, spec,
+       flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec,
                                       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
                                       0, &dest);
        if (IS_ERR(flow_rule))
@@ -535,7 +535,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports)
        esw_destroy_offloads_fdb_table(esw);
 }
 
-static int mlx5_esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
+static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
 {
        switch (mode) {
        case DEVLINK_ESWITCH_MODE_LEGACY:
@@ -551,6 +551,22 @@ static int mlx5_esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
        return 0;
 }
 
+static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode)
+{
+       switch (mlx5_mode) {
+       case SRIOV_LEGACY:
+               *mode = DEVLINK_ESWITCH_MODE_LEGACY;
+               break;
+       case SRIOV_OFFLOADS:
+               *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
 {
        struct mlx5_core_dev *dev;
@@ -566,7 +582,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
        if (cur_mlx5_mode == SRIOV_NONE)
                return -EOPNOTSUPP;
 
-       if (mlx5_esw_mode_from_devlink(mode, &mlx5_mode))
+       if (esw_mode_from_devlink(mode, &mlx5_mode))
                return -EINVAL;
 
        if (cur_mlx5_mode == mlx5_mode)
@@ -592,9 +608,7 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
        if (dev->priv.eswitch->mode == SRIOV_NONE)
                return -EOPNOTSUPP;
 
-       *mode = dev->priv.eswitch->mode;
-
-       return 0;
+       return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
 }
 
 void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
index 9134010..7a0415e 100644 (file)
 int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
                            struct mlx5_flow_table *ft)
 {
-       u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)];
-       u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)];
-
-       memset(in, 0, sizeof(in));
+       u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0};
 
        MLX5_SET(set_flow_table_root_in, in, opcode,
                 MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
@@ -55,30 +53,23 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
                MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
        }
 
-       memset(out, 0, sizeof(out));
-       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-                                         sizeof(out));
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
                               u16 vport,
+                              enum fs_flow_table_op_mod op_mod,
                               enum fs_flow_table_type type, unsigned int level,
                               unsigned int log_size, struct mlx5_flow_table
                               *next_ft, unsigned int *table_id)
 {
-       u32 out[MLX5_ST_SZ_DW(create_flow_table_out)];
-       u32 in[MLX5_ST_SZ_DW(create_flow_table_in)];
+       u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]   = {0};
        int err;
 
-       memset(in, 0, sizeof(in));
-
        MLX5_SET(create_flow_table_in, in, opcode,
                 MLX5_CMD_OP_CREATE_FLOW_TABLE);
 
-       if (next_ft) {
-               MLX5_SET(create_flow_table_in, in, table_miss_mode, 1);
-               MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id);
-       }
        MLX5_SET(create_flow_table_in, in, table_type, type);
        MLX5_SET(create_flow_table_in, in, level, level);
        MLX5_SET(create_flow_table_in, in, log_size, log_size);
@@ -87,10 +78,23 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
                MLX5_SET(create_flow_table_in, in, other_vport, 1);
        }
 
-       memset(out, 0, sizeof(out));
-       err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-                                        sizeof(out));
+       switch (op_mod) {
+       case FS_FT_OP_MOD_NORMAL:
+               if (next_ft) {
+                       MLX5_SET(create_flow_table_in, in, table_miss_mode, 1);
+                       MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id);
+               }
+               break;
+
+       case FS_FT_OP_MOD_LAG_DEMUX:
+               MLX5_SET(create_flow_table_in, in, op_mod, 0x1);
+               if (next_ft)
+                       MLX5_SET(create_flow_table_in, in, lag_master_next_table_id,
+                                next_ft->id);
+               break;
+       }
 
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
        if (!err)
                *table_id = MLX5_GET(create_flow_table_out, out,
                                     table_id);
@@ -100,11 +104,8 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
                                struct mlx5_flow_table *ft)
 {
-       u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)];
-       u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)];
-
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
+       u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0};
 
        MLX5_SET(destroy_flow_table_in, in, opcode,
                 MLX5_CMD_OP_DESTROY_FLOW_TABLE);
@@ -115,39 +116,49 @@ int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
                MLX5_SET(destroy_flow_table_in, in, other_vport, 1);
        }
 
-       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-                                         sizeof(out));
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
                               struct mlx5_flow_table *ft,
                               struct mlx5_flow_table *next_ft)
 {
-       u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)];
-       u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)];
-
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
+       u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0};
 
        MLX5_SET(modify_flow_table_in, in, opcode,
                 MLX5_CMD_OP_MODIFY_FLOW_TABLE);
        MLX5_SET(modify_flow_table_in, in, table_type, ft->type);
        MLX5_SET(modify_flow_table_in, in, table_id, ft->id);
-       if (ft->vport) {
-               MLX5_SET(modify_flow_table_in, in, vport_number, ft->vport);
-               MLX5_SET(modify_flow_table_in, in, other_vport, 1);
-       }
-       MLX5_SET(modify_flow_table_in, in, modify_field_select,
-                MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
-       if (next_ft) {
-               MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1);
-               MLX5_SET(modify_flow_table_in, in, table_miss_id, next_ft->id);
+
+       if (ft->op_mod == FS_FT_OP_MOD_LAG_DEMUX) {
+               MLX5_SET(modify_flow_table_in, in, modify_field_select,
+                        MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID);
+               if (next_ft) {
+                       MLX5_SET(modify_flow_table_in, in,
+                                lag_master_next_table_id, next_ft->id);
+               } else {
+                       MLX5_SET(modify_flow_table_in, in,
+                                lag_master_next_table_id, 0);
+               }
        } else {
-               MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0);
+               if (ft->vport) {
+                       MLX5_SET(modify_flow_table_in, in, vport_number,
+                                ft->vport);
+                       MLX5_SET(modify_flow_table_in, in, other_vport, 1);
+               }
+               MLX5_SET(modify_flow_table_in, in, modify_field_select,
+                        MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
+               if (next_ft) {
+                       MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1);
+                       MLX5_SET(modify_flow_table_in, in, table_miss_id,
+                                next_ft->id);
+               } else {
+                       MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0);
+               }
        }
 
-       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-                                         sizeof(out));
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
@@ -155,12 +166,10 @@ int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
                               u32 *in,
                               unsigned int *group_id)
 {
+       u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0};
        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-       u32 out[MLX5_ST_SZ_DW(create_flow_group_out)];
        int err;
 
-       memset(out, 0, sizeof(out));
-
        MLX5_SET(create_flow_group_in, in, opcode,
                 MLX5_CMD_OP_CREATE_FLOW_GROUP);
        MLX5_SET(create_flow_group_in, in, table_type, ft->type);
@@ -170,13 +179,10 @@ int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
                MLX5_SET(create_flow_group_in, in, other_vport, 1);
        }
 
-       err = mlx5_cmd_exec_check_status(dev, in,
-                                        inlen, out,
-                                        sizeof(out));
+       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
        if (!err)
                *group_id = MLX5_GET(create_flow_group_out, out,
                                     group_id);
-
        return err;
 }
 
@@ -184,11 +190,8 @@ int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
                                struct mlx5_flow_table *ft,
                                unsigned int group_id)
 {
-       u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)];
-       u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)];
-
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
+       u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]   = {0};
 
        MLX5_SET(destroy_flow_group_in, in, opcode,
                 MLX5_CMD_OP_DESTROY_FLOW_GROUP);
@@ -200,8 +203,7 @@ int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev,
                MLX5_SET(destroy_flow_group_in, in, other_vport, 1);
        }
 
-       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-                                         sizeof(out));
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
@@ -212,7 +214,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 {
        unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) +
                fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct);
-       u32 out[MLX5_ST_SZ_DW(set_fte_out)];
+       u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0};
        struct mlx5_flow_rule *dst;
        void *in_flow_context;
        void *in_match_value;
@@ -290,11 +292,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
                         list_size);
        }
 
-       memset(out, 0, sizeof(out));
-       err = mlx5_cmd_exec_check_status(dev, in, inlen, out,
-                                        sizeof(out));
+       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
        kvfree(in);
-
        return err;
 }
 
@@ -303,7 +302,7 @@ int mlx5_cmd_create_fte(struct mlx5_core_dev *dev,
                        unsigned group_id,
                        struct fs_fte *fte)
 {
-       return  mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte);
+       return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte);
 }
 
 int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
@@ -327,12 +326,8 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
                        struct mlx5_flow_table *ft,
                        unsigned int index)
 {
-       u32 out[MLX5_ST_SZ_DW(delete_fte_out)];
-       u32 in[MLX5_ST_SZ_DW(delete_fte_in)];
-       int err;
-
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
+       u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(delete_fte_in)]   = {0};
 
        MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
        MLX5_SET(delete_fte_in, in, table_type, ft->type);
@@ -343,74 +338,55 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
                MLX5_SET(delete_fte_in, in, other_vport, 1);
        }
 
-       err =  mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
-
-       return err;
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id)
 {
-       u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)];
-       u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)];
+       u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0};
        int err;
 
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
-
        MLX5_SET(alloc_flow_counter_in, in, opcode,
                 MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
 
-       err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-                                        sizeof(out));
-       if (err)
-               return err;
-
-       *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
-
-       return 0;
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       if (!err)
+               *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
+       return err;
 }
 
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id)
 {
-       u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)];
-       u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)];
-
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
+       u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)] = {0};
 
        MLX5_SET(dealloc_flow_counter_in, in, opcode,
                 MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
        MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id);
-
-       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-                                         sizeof(out));
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
                      u64 *packets, u64 *bytes)
 {
        u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
-               MLX5_ST_SZ_BYTES(traffic_counter)];
-       u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)];
+               MLX5_ST_SZ_BYTES(traffic_counter)]   = {0};
+       u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
        void *stats;
        int err = 0;
 
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
-
        MLX5_SET(query_flow_counter_in, in, opcode,
                 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
        MLX5_SET(query_flow_counter_in, in, op_mod, 0);
        MLX5_SET(query_flow_counter_in, in, flow_counter_id, id);
-
-       err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
        if (err)
                return err;
 
        stats = MLX5_ADDR_OF(query_flow_counter_out, out, flow_statistics);
        *packets = MLX5_GET64(traffic_counter, stats, packets);
        *bytes = MLX5_GET64(traffic_counter, stats, octets);
-
        return 0;
 }
 
@@ -448,18 +424,14 @@ void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b)
 int
 mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b)
 {
-       u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)];
-
-       memset(in, 0, sizeof(in));
+       u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
 
        MLX5_SET(query_flow_counter_in, in, opcode,
                 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
        MLX5_SET(query_flow_counter_in, in, op_mod, 0);
        MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id);
        MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num);
-
-       return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
-                                         b->out, b->outlen);
+       return mlx5_cmd_exec(dev, in, sizeof(in), b->out, b->outlen);
 }
 
 void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
@@ -480,3 +452,51 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
        *packets = MLX5_GET64(traffic_counter, stats, packets);
        *bytes = MLX5_GET64(traffic_counter, stats, octets);
 }
+
+#define MAX_ENCAP_SIZE (128)
+
+int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev,
+                        int header_type,
+                        size_t size,
+                        void *encap_header,
+                        u32 *encap_id)
+{
+       u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)];
+       u32 in[MLX5_ST_SZ_DW(alloc_encap_header_in) +
+             (MAX_ENCAP_SIZE / sizeof(u32))];
+       void *encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in,
+                                            encap_header);
+       void *header = MLX5_ADDR_OF(encap_header_in, encap_header_in,
+                                   encap_header);
+       int inlen = header - (void *)in  + size;
+       int err;
+
+       if (size > MAX_ENCAP_SIZE)
+               return -EINVAL;
+
+       memset(in, 0, inlen);
+       MLX5_SET(alloc_encap_header_in, in, opcode,
+                MLX5_CMD_OP_ALLOC_ENCAP_HEADER);
+       MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size);
+       MLX5_SET(encap_header_in, encap_header_in, header_type, header_type);
+       memcpy(header, encap_header, size);
+
+       memset(out, 0, sizeof(out));
+       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+
+       *encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id);
+       return err;
+}
+
+void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id)
+{
+       u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)];
+       u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)];
+
+       memset(in, 0, sizeof(in));
+       MLX5_SET(dealloc_encap_header_in, in, opcode,
+                MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
+       MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id);
+
+       mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
index 158844c..c5bc468 100644 (file)
@@ -35,6 +35,7 @@
 
 int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
                               u16 vport,
+                              enum fs_flow_table_op_mod op_mod,
                               enum fs_flow_table_type type, unsigned int level,
                               unsigned int log_size, struct mlx5_flow_table
                               *next_ft, unsigned int *table_id);
@@ -88,4 +89,11 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
                          struct mlx5_cmd_fc_bulk *b, u16 id,
                          u64 *packets, u64 *bytes);
 
+int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev,
+                        int header_type,
+                        size_t size,
+                        void *encap_header,
+                        u32 *encap_id);
+void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id);
+
 #endif
index 75bb8c8..5da2cc8 100644 (file)
@@ -80,7 +80,7 @@
                           LEFTOVERS_NUM_PRIOS)
 
 #define ETHTOOL_PRIO_NUM_LEVELS 1
-#define ETHTOOL_NUM_PRIOS 10
+#define ETHTOOL_NUM_PRIOS 11
 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
 /* Vlan, mac, ttc, aRFS */
 #define KERNEL_NIC_PRIO_NUM_LEVELS 4
 #define OFFLOADS_NUM_PRIOS 1
 #define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1)
 
+#define LAG_PRIO_NUM_LEVELS 1
+#define LAG_NUM_PRIOS 1
+#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1)
+
 struct node_caps {
        size_t  arr_sz;
        long    *caps;
@@ -111,12 +115,16 @@ static struct init_tree_node {
        int num_levels;
 } root_fs = {
        .type = FS_TYPE_NAMESPACE,
-       .ar_size = 6,
+       .ar_size = 7,
        .children = (struct init_tree_node[]) {
                ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
                         FS_CHAINING_CAPS,
                         ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
                                                  BY_PASS_PRIO_NUM_LEVELS))),
+               ADD_PRIO(0, LAG_MIN_LEVEL, 0,
+                        FS_CHAINING_CAPS,
+                        ADD_NS(ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS,
+                                                 LAG_PRIO_NUM_LEVELS))),
                ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {},
                         ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))),
                ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0,
@@ -345,7 +353,7 @@ static void del_flow_table(struct fs_node *node)
 
        err = mlx5_cmd_destroy_flow_table(dev, ft);
        if (err)
-               pr_warn("flow steering can't destroy ft\n");
+               mlx5_core_warn(dev, "flow steering can't destroy ft\n");
        fs_get_obj(prio, ft->node.parent);
        prio->num_ft--;
 }
@@ -364,7 +372,7 @@ static void del_rule(struct fs_node *node)
 
        match_value = mlx5_vzalloc(match_len);
        if (!match_value) {
-               pr_warn("failed to allocate inbox\n");
+               mlx5_core_warn(dev, "failed to allocate inbox\n");
                return;
        }
 
@@ -387,8 +395,9 @@ static void del_rule(struct fs_node *node)
                                          modify_mask,
                                          fte);
                if (err)
-                       pr_warn("%s can't del rule fg id=%d fte_index=%d\n",
-                               __func__, fg->id, fte->index);
+                       mlx5_core_warn(dev,
+                                      "%s can't del rule fg id=%d fte_index=%d\n",
+                                      __func__, fg->id, fte->index);
        }
        kvfree(match_value);
 }
@@ -409,8 +418,9 @@ static void del_fte(struct fs_node *node)
        err = mlx5_cmd_delete_fte(dev, ft,
                                  fte->index);
        if (err)
-               pr_warn("flow steering can't delete fte in index %d of flow group id %d\n",
-                       fte->index, fg->id);
+               mlx5_core_warn(dev,
+                              "flow steering can't delete fte in index %d of flow group id %d\n",
+                              fte->index, fg->id);
 
        fte->status = 0;
        fg->num_ftes--;
@@ -427,8 +437,8 @@ static void del_flow_group(struct fs_node *node)
        dev = get_dev(&ft->node);
 
        if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id))
-               pr_warn("flow steering can't destroy fg %d of ft %d\n",
-                       fg->id, ft->id);
+               mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
+                              fg->id, ft->id);
 }
 
 static struct fs_fte *alloc_fte(u8 action,
@@ -475,7 +485,8 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in)
 }
 
 static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte,
-                                               enum fs_flow_table_type table_type)
+                                               enum fs_flow_table_type table_type,
+                                               enum fs_flow_table_op_mod op_mod)
 {
        struct mlx5_flow_table *ft;
 
@@ -485,6 +496,7 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft
 
        ft->level = level;
        ft->node.type = FS_TYPE_FLOW_TABLE;
+       ft->op_mod = op_mod;
        ft->type = table_type;
        ft->vport = vport;
        ft->max_fte = max_fte;
@@ -722,6 +734,7 @@ static void list_add_flow_table(struct mlx5_flow_table *ft,
 }
 
 static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
+                                                       enum fs_flow_table_op_mod op_mod,
                                                        u16 vport, int prio,
                                                        int max_fte, u32 level)
 {
@@ -754,18 +767,19 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
        level += fs_prio->start_level;
        ft = alloc_flow_table(level,
                              vport,
-                             roundup_pow_of_two(max_fte),
-                             root->table_type);
+                             max_fte ? roundup_pow_of_two(max_fte) : 0,
+                             root->table_type,
+                             op_mod);
        if (!ft) {
                err = -ENOMEM;
                goto unlock_root;
        }
 
        tree_init_node(&ft->node, 1, del_flow_table);
-       log_table_sz = ilog2(ft->max_fte);
+       log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
        next_ft = find_next_chained_ft(fs_prio);
-       err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->type, ft->level,
-                                        log_table_sz, next_ft, &ft->id);
+       err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type,
+                                        ft->level, log_table_sz, next_ft, &ft->id);
        if (err)
                goto free_ft;
 
@@ -792,15 +806,26 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
                                               int prio, int max_fte,
                                               u32 level)
 {
-       return __mlx5_create_flow_table(ns, 0, prio, max_fte, level);
+       return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, 0, prio,
+                                       max_fte, level);
 }
 
 struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
                                                     int prio, int max_fte,
                                                     u32 level, u16 vport)
 {
-       return __mlx5_create_flow_table(ns, vport, prio, max_fte, level);
+       return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, vport, prio,
+                                       max_fte, level);
+}
+
+struct mlx5_flow_table *mlx5_create_lag_demux_flow_table(
+                                              struct mlx5_flow_namespace *ns,
+                                              int prio, u32 level)
+{
+       return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_LAG_DEMUX, 0, prio, 0,
+                                       level);
 }
+EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
 
 struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
                                                            int prio,
@@ -1379,6 +1404,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 
        switch (type) {
        case MLX5_FLOW_NAMESPACE_BYPASS:
+       case MLX5_FLOW_NAMESPACE_LAG:
        case MLX5_FLOW_NAMESPACE_OFFLOADS:
        case MLX5_FLOW_NAMESPACE_ETHTOOL:
        case MLX5_FLOW_NAMESPACE_KERNEL:
@@ -1401,6 +1427,16 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
                        return &steering->esw_ingress_root_ns->ns;
                else
                        return NULL;
+       case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
+               if (steering->sniffer_rx_root_ns)
+                       return &steering->sniffer_rx_root_ns->ns;
+               else
+                       return NULL;
+       case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
+               if (steering->sniffer_tx_root_ns)
+                       return &steering->sniffer_tx_root_ns->ns;
+               else
+                       return NULL;
        default:
                return NULL;
        }
@@ -1700,10 +1736,46 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
        cleanup_root_ns(steering->esw_egress_root_ns);
        cleanup_root_ns(steering->esw_ingress_root_ns);
        cleanup_root_ns(steering->fdb_root_ns);
+       cleanup_root_ns(steering->sniffer_rx_root_ns);
+       cleanup_root_ns(steering->sniffer_tx_root_ns);
        mlx5_cleanup_fc_stats(dev);
        kfree(steering);
 }
 
+static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
+{
+       struct fs_prio *prio;
+
+       steering->sniffer_tx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_TX);
+       if (!steering->sniffer_tx_root_ns)
+               return -ENOMEM;
+
+       /* Create single prio */
+       prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1);
+       if (IS_ERR(prio)) {
+               cleanup_root_ns(steering->sniffer_tx_root_ns);
+               return PTR_ERR(prio);
+       }
+       return 0;
+}
+
+static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
+{
+       struct fs_prio *prio;
+
+       steering->sniffer_rx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_RX);
+       if (!steering->sniffer_rx_root_ns)
+               return -ENOMEM;
+
+       /* Create single prio */
+       prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1);
+       if (IS_ERR(prio)) {
+               cleanup_root_ns(steering->sniffer_rx_root_ns);
+               return PTR_ERR(prio);
+       }
+       return 0;
+}
+
 static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
 {
        struct fs_prio *prio;
@@ -1800,6 +1872,18 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
                }
        }
 
+       if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) {
+               err = init_sniffer_rx_root_ns(steering);
+               if (err)
+                       goto err;
+       }
+
+       if (MLX5_CAP_FLOWTABLE_SNIFFER_TX(dev, ft_support)) {
+               err = init_sniffer_tx_root_ns(steering);
+               if (err)
+                       goto err;
+       }
+
        return 0;
 err:
        mlx5_cleanup_fs(dev);
index 9cffb6a..71ff03b 100644 (file)
@@ -49,6 +49,13 @@ enum fs_flow_table_type {
        FS_FT_ESW_EGRESS_ACL  = 0x2,
        FS_FT_ESW_INGRESS_ACL = 0x3,
        FS_FT_FDB             = 0X4,
+       FS_FT_SNIFFER_RX        = 0X5,
+       FS_FT_SNIFFER_TX        = 0X6,
+};
+
+enum fs_flow_table_op_mod {
+       FS_FT_OP_MOD_NORMAL,
+       FS_FT_OP_MOD_LAG_DEMUX,
 };
 
 enum fs_fte_status {
@@ -61,6 +68,8 @@ struct mlx5_flow_steering {
        struct mlx5_flow_root_namespace *fdb_root_ns;
        struct mlx5_flow_root_namespace *esw_egress_root_ns;
        struct mlx5_flow_root_namespace *esw_ingress_root_ns;
+       struct mlx5_flow_root_namespace *sniffer_tx_root_ns;
+       struct mlx5_flow_root_namespace *sniffer_rx_root_ns;
 };
 
 struct fs_node {
@@ -93,6 +102,7 @@ struct mlx5_flow_table {
        unsigned int                    max_fte;
        unsigned int                    level;
        enum fs_flow_table_type         type;
+       enum fs_flow_table_op_mod       op_mod;
        struct {
                bool                    active;
                unsigned int            required_groups;
index c2877e9..3a9195b 100644 (file)
@@ -126,12 +126,21 @@ static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
        for (node = &first->node; node; node = rb_next(node)) {
                struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node);
                struct mlx5_fc_cache *c = &counter->cache;
+               u64 packets;
+               u64 bytes;
 
                if (counter->id > last_id)
                        break;
 
                mlx5_cmd_fc_bulk_get(dev, b,
-                                    counter->id, &c->packets, &c->bytes);
+                                    counter->id, &packets, &bytes);
+
+               if (c->packets == packets)
+                       continue;
+
+               c->packets = packets;
+               c->bytes = bytes;
+               c->lastuse = jiffies;
        }
 
 out:
index 77fc1aa..5718aad 100644 (file)
 static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out,
                                  int outlen)
 {
-       u32 in[MLX5_ST_SZ_DW(query_adapter_in)];
-
-       memset(in, 0, sizeof(in));
+       u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {0};
 
        MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER);
-
-       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 
 int mlx5_query_board_id(struct mlx5_core_dev *dev)
@@ -162,38 +159,18 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev)
 {
-       struct mlx5_cmd_init_hca_mbox_in in;
-       struct mlx5_cmd_init_hca_mbox_out out;
-       int err;
-
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_INIT_HCA);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-       if (err)
-               return err;
-
-       if (out.hdr.status)
-               err = mlx5_cmd_status_to_err(&out.hdr);
+       u32 out[MLX5_ST_SZ_DW(init_hca_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(init_hca_in)]   = {0};
 
-       return err;
+       MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev)
 {
-       struct mlx5_cmd_teardown_hca_mbox_in in;
-       struct mlx5_cmd_teardown_hca_mbox_out out;
-       int err;
+       u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(teardown_hca_in)]   = {0};
 
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_TEARDOWN_HCA);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-       if (err)
-               return err;
-
-       if (out.hdr.status)
-               err = mlx5_cmd_status_to_err(&out.hdr);
-
-       return err;
+       MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
new file mode 100644 (file)
index 0000000..5595724
--- /dev/null
@@ -0,0 +1,588 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
+#include "mlx5_core.h"
+
+enum {
+       MLX5_LAG_FLAG_BONDED = 1 << 0,
+};
+
+struct lag_func {
+       struct mlx5_core_dev *dev;
+       struct net_device    *netdev;
+};
+
+/* Used for collection of netdev event info. */
+struct lag_tracker {
+       enum   netdev_lag_tx_type           tx_type;
+       struct netdev_lag_lower_state_info  netdev_state[MLX5_MAX_PORTS];
+       bool is_bonded;
+};
+
+/* LAG data of a ConnectX card.
+ * It serves both its phys functions.
+ */
+struct mlx5_lag {
+       u8                        flags;
+       u8                        v2p_map[MLX5_MAX_PORTS];
+       struct lag_func           pf[MLX5_MAX_PORTS];
+       struct lag_tracker        tracker;
+       struct delayed_work       bond_work;
+       struct notifier_block     nb;
+};
+
+/* General purpose, use for short periods of time.
+ * Beware of lock dependencies (preferably, no locks should be acquired
+ * under it).
+ */
+static DEFINE_MUTEX(lag_mutex);
+
+static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
+                              u8 remap_port2)
+{
+       u32   in[MLX5_ST_SZ_DW(create_lag_in)]   = {0};
+       u32   out[MLX5_ST_SZ_DW(create_lag_out)] = {0};
+       void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
+
+       MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
+
+       MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
+       MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
+                              u8 remap_port2)
+{
+       u32   in[MLX5_ST_SZ_DW(modify_lag_in)]   = {0};
+       u32   out[MLX5_ST_SZ_DW(modify_lag_out)] = {0};
+       void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
+
+       MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
+       MLX5_SET(modify_lag_in, in, field_select, 0x1);
+
+       MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
+       MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_destroy_lag(struct mlx5_core_dev *dev)
+{
+       u32  in[MLX5_ST_SZ_DW(destroy_lag_in)]  = {0};
+       u32 out[MLX5_ST_SZ_DW(destroy_lag_out)] = {0};
+
+       MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
+{
+       u32  in[MLX5_ST_SZ_DW(create_vport_lag_in)]  = {0};
+       u32 out[MLX5_ST_SZ_DW(create_vport_lag_out)] = {0};
+
+       MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
+
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
+
+int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
+{
+       u32  in[MLX5_ST_SZ_DW(destroy_vport_lag_in)]  = {0};
+       u32 out[MLX5_ST_SZ_DW(destroy_vport_lag_out)] = {0};
+
+       MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
+
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
+
+static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev)
+{
+       return dev->priv.lag;
+}
+
+static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
+                                      struct net_device *ndev)
+{
+       int i;
+
+       for (i = 0; i < MLX5_MAX_PORTS; i++)
+               if (ldev->pf[i].netdev == ndev)
+                       return i;
+
+       return -1;
+}
+
+static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev)
+{
+       return !!(ldev->flags & MLX5_LAG_FLAG_BONDED);
+}
+
+static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
+                                          u8 *port1, u8 *port2)
+{
+       if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+               if (tracker->netdev_state[0].tx_enabled) {
+                       *port1 = 1;
+                       *port2 = 1;
+               } else {
+                       *port1 = 2;
+                       *port2 = 2;
+               }
+       } else {
+               *port1 = 1;
+               *port2 = 2;
+               if (!tracker->netdev_state[0].link_up)
+                       *port1 = 2;
+               else if (!tracker->netdev_state[1].link_up)
+                       *port2 = 1;
+       }
+}
+
+static void mlx5_activate_lag(struct mlx5_lag *ldev,
+                             struct lag_tracker *tracker)
+{
+       struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+       int err;
+
+       ldev->flags |= MLX5_LAG_FLAG_BONDED;
+
+       mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0],
+                                      &ldev->v2p_map[1]);
+
+       err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]);
+       if (err)
+               mlx5_core_err(dev0,
+                             "Failed to create LAG (%d)\n",
+                             err);
+}
+
+static void mlx5_deactivate_lag(struct mlx5_lag *ldev)
+{
+       struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+       int err;
+
+       ldev->flags &= ~MLX5_LAG_FLAG_BONDED;
+
+       err = mlx5_cmd_destroy_lag(dev0);
+       if (err)
+               mlx5_core_err(dev0,
+                             "Failed to destroy LAG (%d)\n",
+                             err);
+}
+
+static void mlx5_do_bond(struct mlx5_lag *ldev)
+{
+       struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+       struct mlx5_core_dev *dev1 = ldev->pf[1].dev;
+       struct lag_tracker tracker;
+       u8 v2p_port1, v2p_port2;
+       int i, err;
+
+       if (!dev0 || !dev1)
+               return;
+
+       mutex_lock(&lag_mutex);
+       tracker = ldev->tracker;
+       mutex_unlock(&lag_mutex);
+
+       if (tracker.is_bonded && !mlx5_lag_is_bonded(ldev)) {
+               if (mlx5_sriov_is_enabled(dev0) ||
+                   mlx5_sriov_is_enabled(dev1)) {
+                       mlx5_core_warn(dev0, "LAG is not supported with SRIOV");
+                       return;
+               }
+
+               for (i = 0; i < MLX5_MAX_PORTS; i++)
+                       mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
+                                                   MLX5_INTERFACE_PROTOCOL_IB);
+
+               mlx5_activate_lag(ldev, &tracker);
+
+               mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+               mlx5_nic_vport_enable_roce(dev1);
+       } else if (tracker.is_bonded && mlx5_lag_is_bonded(ldev)) {
+               mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1,
+                                              &v2p_port2);
+
+               if ((v2p_port1 != ldev->v2p_map[0]) ||
+                   (v2p_port2 != ldev->v2p_map[1])) {
+                       ldev->v2p_map[0] = v2p_port1;
+                       ldev->v2p_map[1] = v2p_port2;
+
+                       err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
+                       if (err)
+                               mlx5_core_err(dev0,
+                                             "Failed to modify LAG (%d)\n",
+                                             err);
+               }
+       } else if (!tracker.is_bonded && mlx5_lag_is_bonded(ldev)) {
+               mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+               mlx5_nic_vport_disable_roce(dev1);
+
+               mlx5_deactivate_lag(ldev);
+
+               for (i = 0; i < MLX5_MAX_PORTS; i++)
+                       if (ldev->pf[i].dev)
+                               mlx5_add_dev_by_protocol(ldev->pf[i].dev,
+                                                        MLX5_INTERFACE_PROTOCOL_IB);
+       }
+}
+
+static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
+{
+       schedule_delayed_work(&ldev->bond_work, delay);
+}
+
+static void mlx5_do_bond_work(struct work_struct *work)
+{
+       struct delayed_work *delayed_work = to_delayed_work(work);
+       struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
+                                            bond_work);
+       int status;
+
+       status = mlx5_dev_list_trylock();
+       if (!status) {
+               /* 1 sec delay. */
+               mlx5_queue_bond_work(ldev, HZ);
+               return;
+       }
+
+       mlx5_do_bond(ldev);
+       mlx5_dev_list_unlock();
+}
+
+static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
+                                        struct lag_tracker *tracker,
+                                        struct net_device *ndev,
+                                        struct netdev_notifier_changeupper_info *info)
+{
+       struct net_device *upper = info->upper_dev, *ndev_tmp;
+       struct netdev_lag_upper_info *lag_upper_info;
+       bool is_bonded;
+       int bond_status = 0;
+       int num_slaves = 0;
+       int idx;
+
+       if (!netif_is_lag_master(upper))
+               return 0;
+
+       lag_upper_info = info->upper_info;
+
+       /* The event may still be of interest if the slave does not belong to
+        * us, but is enslaved to a master which has one or more of our netdevs
+        * as slaves (e.g., if a new slave is added to a master that bonds two
+        * of our netdevs, we should unbond).
+        */
+       rcu_read_lock();
+       for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
+               idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
+               if (idx > -1)
+                       bond_status |= (1 << idx);
+
+               num_slaves++;
+       }
+       rcu_read_unlock();
+
+       /* None of this lagdev's netdevs are slaves of this master. */
+       if (!(bond_status & 0x3))
+               return 0;
+
+       if (lag_upper_info)
+               tracker->tx_type = lag_upper_info->tx_type;
+
+       /* Determine bonding status:
+        * A device is considered bonded if both its physical ports are slaves
+        * of the same lag master, and only them.
+        * Lag mode must be activebackup or hash.
+        */
+       is_bonded = (num_slaves == MLX5_MAX_PORTS) &&
+                   (bond_status == 0x3) &&
+                   ((tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ||
+                    (tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH));
+
+       if (tracker->is_bonded != is_bonded) {
+               tracker->is_bonded = is_bonded;
+               return 1;
+       }
+
+       return 0;
+}
+
+static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
+                                             struct lag_tracker *tracker,
+                                             struct net_device *ndev,
+                                             struct netdev_notifier_changelowerstate_info *info)
+{
+       struct netdev_lag_lower_state_info *lag_lower_info;
+       int idx;
+
+       if (!netif_is_lag_port(ndev))
+               return 0;
+
+       idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
+       if (idx == -1)
+               return 0;
+
+       /* This information is used to determine virtual to physical
+        * port mapping.
+        */
+       lag_lower_info = info->lower_state_info;
+       if (!lag_lower_info)
+               return 0;
+
+       tracker->netdev_state[idx] = *lag_lower_info;
+
+       return 1;
+}
+
+static int mlx5_lag_netdev_event(struct notifier_block *this,
+                                unsigned long event, void *ptr)
+{
+       struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+       struct lag_tracker tracker;
+       struct mlx5_lag *ldev;
+       int changed = 0;
+
+       if (!net_eq(dev_net(ndev), &init_net))
+               return NOTIFY_DONE;
+
+       if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
+               return NOTIFY_DONE;
+
+       ldev    = container_of(this, struct mlx5_lag, nb);
+       tracker = ldev->tracker;
+
+       switch (event) {
+       case NETDEV_CHANGEUPPER:
+               changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
+                                                       ptr);
+               break;
+       case NETDEV_CHANGELOWERSTATE:
+               changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
+                                                            ndev, ptr);
+               break;
+       }
+
+       mutex_lock(&lag_mutex);
+       ldev->tracker = tracker;
+       mutex_unlock(&lag_mutex);
+
+       if (changed)
+               mlx5_queue_bond_work(ldev, 0);
+
+       return NOTIFY_DONE;
+}
+
+static struct mlx5_lag *mlx5_lag_dev_alloc(void)
+{
+       struct mlx5_lag *ldev;
+
+       ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+       if (!ldev)
+               return NULL;
+
+       INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
+
+       return ldev;
+}
+
+static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
+{
+       kfree(ldev);
+}
+
+static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
+                               struct mlx5_core_dev *dev,
+                               struct net_device *netdev)
+{
+       unsigned int fn = PCI_FUNC(dev->pdev->devfn);
+
+       if (fn >= MLX5_MAX_PORTS)
+               return;
+
+       mutex_lock(&lag_mutex);
+       ldev->pf[fn].dev    = dev;
+       ldev->pf[fn].netdev = netdev;
+       ldev->tracker.netdev_state[fn].link_up = 0;
+       ldev->tracker.netdev_state[fn].tx_enabled = 0;
+
+       dev->priv.lag = ldev;
+       mutex_unlock(&lag_mutex);
+}
+
+static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
+                                  struct mlx5_core_dev *dev)
+{
+       int i;
+
+       for (i = 0; i < MLX5_MAX_PORTS; i++)
+               if (ldev->pf[i].dev == dev)
+                       break;
+
+       if (i == MLX5_MAX_PORTS)
+               return;
+
+       mutex_lock(&lag_mutex);
+       memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
+
+       dev->priv.lag = NULL;
+       mutex_unlock(&lag_mutex);
+}
+
+
+/* Must be called with intf_mutex held */
+void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
+{
+       struct mlx5_lag *ldev = NULL;
+       struct mlx5_core_dev *tmp_dev;
+
+       if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+           !MLX5_CAP_GEN(dev, lag_master) ||
+           (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS))
+               return;
+
+       tmp_dev = mlx5_get_next_phys_dev(dev);
+       if (tmp_dev)
+               ldev = tmp_dev->priv.lag;
+
+       if (!ldev) {
+               ldev = mlx5_lag_dev_alloc();
+               if (!ldev) {
+                       mlx5_core_err(dev, "Failed to alloc lag dev\n");
+                       return;
+               }
+       }
+
+       mlx5_lag_dev_add_pf(ldev, dev, netdev);
+
+       if (!ldev->nb.notifier_call) {
+               ldev->nb.notifier_call = mlx5_lag_netdev_event;
+               if (register_netdevice_notifier(&ldev->nb)) {
+                       ldev->nb.notifier_call = NULL;
+                       mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
+               }
+       }
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_lag_remove(struct mlx5_core_dev *dev)
+{
+       struct mlx5_lag *ldev;
+       int i;
+
+       ldev = mlx5_lag_dev_get(dev);
+       if (!ldev)
+               return;
+
+       if (mlx5_lag_is_bonded(ldev))
+               mlx5_deactivate_lag(ldev);
+
+       mlx5_lag_dev_remove_pf(ldev, dev);
+
+       for (i = 0; i < MLX5_MAX_PORTS; i++)
+               if (ldev->pf[i].dev)
+                       break;
+
+       if (i == MLX5_MAX_PORTS) {
+               if (ldev->nb.notifier_call)
+                       unregister_netdevice_notifier(&ldev->nb);
+               cancel_delayed_work_sync(&ldev->bond_work);
+               mlx5_lag_dev_free(ldev);
+       }
+}
+
+bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
+{
+       struct mlx5_lag *ldev;
+       bool res;
+
+       mutex_lock(&lag_mutex);
+       ldev = mlx5_lag_dev_get(dev);
+       res  = ldev && mlx5_lag_is_bonded(ldev);
+       mutex_unlock(&lag_mutex);
+
+       return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_active);
+
+struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
+{
+       struct net_device *ndev = NULL;
+       struct mlx5_lag *ldev;
+
+       mutex_lock(&lag_mutex);
+       ldev = mlx5_lag_dev_get(dev);
+
+       if (!(ldev && mlx5_lag_is_bonded(ldev)))
+               goto unlock;
+
+       if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+               ndev = ldev->tracker.netdev_state[0].tx_enabled ?
+                      ldev->pf[0].netdev : ldev->pf[1].netdev;
+       } else {
+               ndev = ldev->pf[0].netdev;
+       }
+       if (ndev)
+               dev_hold(ndev);
+
+unlock:
+       mutex_unlock(&lag_mutex);
+
+       return ndev;
+}
+EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
+
+bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+       struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev,
+                                                priv);
+       struct mlx5_lag *ldev;
+
+       if (intf->protocol != MLX5_INTERFACE_PROTOCOL_IB)
+               return true;
+
+       ldev = mlx5_lag_dev_get(dev);
+       if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev)
+               return true;
+
+       /* If bonded, we do not add an IB device for PF1. */
+       return false;
+}
+
index 1368dac..3a3b000 100644 (file)
 int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
                      u16 opmod, u8 port)
 {
-       struct mlx5_mad_ifc_mbox_in *in = NULL;
-       struct mlx5_mad_ifc_mbox_out *out = NULL;
-       int err;
+       int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
+       int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in);
+       int err = -ENOMEM;
+       void *data;
+       void *resp;
+       u32 *out;
+       u32 *in;
 
-       in = kzalloc(sizeof(*in), GFP_KERNEL);
-       if (!in)
-               return -ENOMEM;
-
-       out = kzalloc(sizeof(*out), GFP_KERNEL);
-       if (!out) {
-               err = -ENOMEM;
+       in = kzalloc(inlen, GFP_KERNEL);
+       out = kzalloc(outlen, GFP_KERNEL);
+       if (!in || !out)
                goto out;
-       }
 
-       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MAD_IFC);
-       in->hdr.opmod = cpu_to_be16(opmod);
-       in->port = port;
+       MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
+       MLX5_SET(mad_ifc_in, in, op_mod, opmod);
+       MLX5_SET(mad_ifc_in, in, port, port);
 
-       memcpy(in->data, inb, sizeof(in->data));
+       data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
+       memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
 
-       err = mlx5_cmd_exec(dev, in, sizeof(*in), out, sizeof(*out));
+       err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
        if (err)
                goto out;
 
-       if (out->hdr.status) {
-               err = mlx5_cmd_status_to_err(&out->hdr);
-               goto out;
-       }
-
-       memcpy(outb, out->data, sizeof(out->data));
+       resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet);
+       memcpy(outb, resp,
+              MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet));
 
 out:
        kfree(out);
index 4f491d4..d9c3c70 100644 (file)
@@ -72,16 +72,6 @@ static int prof_sel = MLX5_DEFAULT_PROF;
 module_param_named(prof_sel, prof_sel, int, 0444);
 MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
 
-static LIST_HEAD(intf_list);
-static LIST_HEAD(dev_list);
-static DEFINE_MUTEX(intf_mutex);
-
-struct mlx5_device_context {
-       struct list_head        list;
-       struct mlx5_interface  *intf;
-       void                   *context;
-};
-
 enum {
        MLX5_ATOMIC_REQ_MODE_BE = 0x0,
        MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
@@ -324,7 +314,7 @@ enum {
                                MLX5_DEV_CAP_FLAG_DCT,
 };
 
-static u16 to_fw_pkey_sz(u32 size)
+static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size)
 {
        switch (size) {
        case 128:
@@ -340,7 +330,7 @@ static u16 to_fw_pkey_sz(u32 size)
        case 4096:
                return 5;
        default:
-               pr_warn("invalid pkey table size %d\n", size);
+               mlx5_core_warn(dev, "invalid pkey table size %d\n", size);
                return 0;
        }
 }
@@ -363,10 +353,6 @@ static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
        MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
        MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
        err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
-       if (err)
-               goto query_ex;
-
-       err = mlx5_cmd_status_to_err_v2(out);
        if (err) {
                mlx5_core_warn(dev,
                               "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
@@ -409,20 +395,11 @@ int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
 
 static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
 {
-       u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)];
-       int err;
-
-       memset(out, 0, sizeof(out));
+       u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0};
 
        MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
        MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
-       err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
-       if (err)
-               return err;
-
-       err = mlx5_cmd_status_to_err_v2(out);
-
-       return err;
+       return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
 }
 
 static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
@@ -490,7 +467,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
                      128);
        /* we limit the size of the pkey table to 128 entries for now */
        MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
-                to_fw_pkey_sz(128));
+                to_fw_pkey_sz(dev, 128));
 
        if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
                MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
@@ -528,37 +505,22 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev)
 
 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
 {
-       u32 out[MLX5_ST_SZ_DW(enable_hca_out)];
-       u32 in[MLX5_ST_SZ_DW(enable_hca_in)];
-       int err;
+       u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(enable_hca_in)]   = {0};
 
-       memset(in, 0, sizeof(in));
        MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
        MLX5_SET(enable_hca_in, in, function_id, func_id);
-       memset(out, 0, sizeof(out));
-
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-       if (err)
-               return err;
-
-       return mlx5_cmd_status_to_err_v2(out);
+       return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
 }
 
 int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
 {
-       u32 out[MLX5_ST_SZ_DW(disable_hca_out)];
-       u32 in[MLX5_ST_SZ_DW(disable_hca_in)];
-       int err;
+       u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(disable_hca_in)]   = {0};
 
-       memset(in, 0, sizeof(in));
        MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
        MLX5_SET(disable_hca_in, in, function_id, func_id);
-       memset(out, 0, sizeof(out));
-       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-       if (err)
-               return err;
-
-       return mlx5_cmd_status_to_err_v2(out);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev)
@@ -758,44 +720,40 @@ clean:
 
 static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 {
-       u32 query_in[MLX5_ST_SZ_DW(query_issi_in)];
-       u32 query_out[MLX5_ST_SZ_DW(query_issi_out)];
-       u32 set_in[MLX5_ST_SZ_DW(set_issi_in)];
-       u32 set_out[MLX5_ST_SZ_DW(set_issi_out)];
-       int err;
+       u32 query_in[MLX5_ST_SZ_DW(query_issi_in)]   = {0};
+       u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {0};
        u32 sup_issi;
-
-       memset(query_in, 0, sizeof(query_in));
-       memset(query_out, 0, sizeof(query_out));
+       int err;
 
        MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
-
-       err = mlx5_cmd_exec_check_status(dev, query_in, sizeof(query_in),
-                                        query_out, sizeof(query_out));
+       err = mlx5_cmd_exec(dev, query_in, sizeof(query_in),
+                           query_out, sizeof(query_out));
        if (err) {
-               if (((struct mlx5_outbox_hdr *)query_out)->status ==
-                   MLX5_CMD_STAT_BAD_OP_ERR) {
+               u32 syndrome;
+               u8 status;
+
+               mlx5_cmd_mbox_status(query_out, &status, &syndrome);
+               if (status == MLX5_CMD_STAT_BAD_OP_ERR) {
                        pr_debug("Only ISSI 0 is supported\n");
                        return 0;
                }
 
-               pr_err("failed to query ISSI\n");
+               pr_err("failed to query ISSI err(%d)\n", err);
                return err;
        }
 
        sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
 
        if (sup_issi & (1 << 1)) {
-               memset(set_in, 0, sizeof(set_in));
-               memset(set_out, 0, sizeof(set_out));
+               u32 set_in[MLX5_ST_SZ_DW(set_issi_in)]   = {0};
+               u32 set_out[MLX5_ST_SZ_DW(set_issi_out)] = {0};
 
                MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
                MLX5_SET(set_issi_in, set_in, current_issi, 1);
-
-               err = mlx5_cmd_exec_check_status(dev, set_in, sizeof(set_in),
-                                                set_out, sizeof(set_out));
+               err = mlx5_cmd_exec(dev, set_in, sizeof(set_in),
+                                   set_out, sizeof(set_out));
                if (err) {
-                       pr_err("failed to set ISSI=1\n");
+                       pr_err("failed to set ISSI=1 err(%d)\n", err);
                        return err;
                }
 
@@ -809,120 +767,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
        return -ENOTSUPP;
 }
 
-static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
-{
-       struct mlx5_device_context *dev_ctx;
-       struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
-
-       dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL);
-       if (!dev_ctx)
-               return;
-
-       dev_ctx->intf    = intf;
-       dev_ctx->context = intf->add(dev);
-
-       if (dev_ctx->context) {
-               spin_lock_irq(&priv->ctx_lock);
-               list_add_tail(&dev_ctx->list, &priv->ctx_list);
-               spin_unlock_irq(&priv->ctx_lock);
-       } else {
-               kfree(dev_ctx);
-       }
-}
-
-static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
-{
-       struct mlx5_device_context *dev_ctx;
-       struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
-
-       list_for_each_entry(dev_ctx, &priv->ctx_list, list)
-               if (dev_ctx->intf == intf) {
-                       spin_lock_irq(&priv->ctx_lock);
-                       list_del(&dev_ctx->list);
-                       spin_unlock_irq(&priv->ctx_lock);
-
-                       intf->remove(dev, dev_ctx->context);
-                       kfree(dev_ctx);
-                       return;
-               }
-}
-
-static int mlx5_register_device(struct mlx5_core_dev *dev)
-{
-       struct mlx5_priv *priv = &dev->priv;
-       struct mlx5_interface *intf;
-
-       mutex_lock(&intf_mutex);
-       list_add_tail(&priv->dev_list, &dev_list);
-       list_for_each_entry(intf, &intf_list, list)
-               mlx5_add_device(intf, priv);
-       mutex_unlock(&intf_mutex);
-
-       return 0;
-}
-
-static void mlx5_unregister_device(struct mlx5_core_dev *dev)
-{
-       struct mlx5_priv *priv = &dev->priv;
-       struct mlx5_interface *intf;
-
-       mutex_lock(&intf_mutex);
-       list_for_each_entry(intf, &intf_list, list)
-               mlx5_remove_device(intf, priv);
-       list_del(&priv->dev_list);
-       mutex_unlock(&intf_mutex);
-}
-
-int mlx5_register_interface(struct mlx5_interface *intf)
-{
-       struct mlx5_priv *priv;
-
-       if (!intf->add || !intf->remove)
-               return -EINVAL;
-
-       mutex_lock(&intf_mutex);
-       list_add_tail(&intf->list, &intf_list);
-       list_for_each_entry(priv, &dev_list, dev_list)
-               mlx5_add_device(intf, priv);
-       mutex_unlock(&intf_mutex);
-
-       return 0;
-}
-EXPORT_SYMBOL(mlx5_register_interface);
-
-void mlx5_unregister_interface(struct mlx5_interface *intf)
-{
-       struct mlx5_priv *priv;
-
-       mutex_lock(&intf_mutex);
-       list_for_each_entry(priv, &dev_list, dev_list)
-               mlx5_remove_device(intf, priv);
-       list_del(&intf->list);
-       mutex_unlock(&intf_mutex);
-}
-EXPORT_SYMBOL(mlx5_unregister_interface);
-
-void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
-{
-       struct mlx5_priv *priv = &mdev->priv;
-       struct mlx5_device_context *dev_ctx;
-       unsigned long flags;
-       void *result = NULL;
-
-       spin_lock_irqsave(&priv->ctx_lock, flags);
-
-       list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
-               if ((dev_ctx->intf->protocol == protocol) &&
-                   dev_ctx->intf->get_dev) {
-                       result = dev_ctx->intf->get_dev(dev_ctx->context);
-                       break;
-               }
-
-       spin_unlock_irqrestore(&priv->ctx_lock, flags);
-
-       return result;
-}
-EXPORT_SYMBOL(mlx5_get_protocol_dev);
 
 static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 {
@@ -995,8 +839,102 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
        debugfs_remove(priv->dbg_root);
 }
 
-#define MLX5_IB_MOD "mlx5_ib"
-static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+{
+       struct pci_dev *pdev = dev->pdev;
+       int err;
+
+       err = mlx5_query_hca_caps(dev);
+       if (err) {
+               dev_err(&pdev->dev, "query hca failed\n");
+               goto out;
+       }
+
+       err = mlx5_query_board_id(dev);
+       if (err) {
+               dev_err(&pdev->dev, "query board id failed\n");
+               goto out;
+       }
+
+       err = mlx5_eq_init(dev);
+       if (err) {
+               dev_err(&pdev->dev, "failed to initialize eq\n");
+               goto out;
+       }
+
+       MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
+
+       err = mlx5_init_cq_table(dev);
+       if (err) {
+               dev_err(&pdev->dev, "failed to initialize cq table\n");
+               goto err_eq_cleanup;
+       }
+
+       mlx5_init_qp_table(dev);
+
+       mlx5_init_srq_table(dev);
+
+       mlx5_init_mkey_table(dev);
+
+       err = mlx5_init_rl_table(dev);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to init rate limiting\n");
+               goto err_tables_cleanup;
+       }
+
+#ifdef CONFIG_MLX5_CORE_EN
+       err = mlx5_eswitch_init(dev);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to init eswitch %d\n", err);
+               goto err_rl_cleanup;
+       }
+#endif
+
+       err = mlx5_sriov_init(dev);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to init sriov %d\n", err);
+               goto err_eswitch_cleanup;
+       }
+
+       return 0;
+
+err_eswitch_cleanup:
+#ifdef CONFIG_MLX5_CORE_EN
+       mlx5_eswitch_cleanup(dev->priv.eswitch);
+
+err_rl_cleanup:
+#endif
+       mlx5_cleanup_rl_table(dev);
+
+err_tables_cleanup:
+       mlx5_cleanup_mkey_table(dev);
+       mlx5_cleanup_srq_table(dev);
+       mlx5_cleanup_qp_table(dev);
+       mlx5_cleanup_cq_table(dev);
+
+err_eq_cleanup:
+       mlx5_eq_cleanup(dev);
+
+out:
+       return err;
+}
+
+static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
+{
+       mlx5_sriov_cleanup(dev);
+#ifdef CONFIG_MLX5_CORE_EN
+       mlx5_eswitch_cleanup(dev->priv.eswitch);
+#endif
+       mlx5_cleanup_rl_table(dev);
+       mlx5_cleanup_mkey_table(dev);
+       mlx5_cleanup_srq_table(dev);
+       mlx5_cleanup_qp_table(dev);
+       mlx5_cleanup_cq_table(dev);
+       mlx5_eq_cleanup(dev);
+}
+
+static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
+                        bool boot)
 {
        struct pci_dev *pdev = dev->pdev;
        int err;
@@ -1029,12 +967,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
                goto out_err;
        }
 
-       mlx5_pagealloc_init(dev);
-
        err = mlx5_core_enable_hca(dev, 0);
        if (err) {
                dev_err(&pdev->dev, "enable hca failed\n");
-               goto err_pagealloc_cleanup;
+               goto err_cmd_cleanup;
        }
 
        err = mlx5_core_set_issi(dev);
@@ -1087,34 +1023,21 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 
        mlx5_start_health_poll(dev);
 
-       err = mlx5_query_hca_caps(dev);
-       if (err) {
-               dev_err(&pdev->dev, "query hca failed\n");
-               goto err_stop_poll;
-       }
-
-       err = mlx5_query_board_id(dev);
-       if (err) {
-               dev_err(&pdev->dev, "query board id failed\n");
+       if (boot && mlx5_init_once(dev, priv)) {
+               dev_err(&pdev->dev, "sw objs init failed\n");
                goto err_stop_poll;
        }
 
        err = mlx5_enable_msix(dev);
        if (err) {
                dev_err(&pdev->dev, "enable msix failed\n");
-               goto err_stop_poll;
-       }
-
-       err = mlx5_eq_init(dev);
-       if (err) {
-               dev_err(&pdev->dev, "failed to initialize eq\n");
-               goto disable_msix;
+               goto err_cleanup_once;
        }
 
        err = mlx5_alloc_uuars(dev, &priv->uuari);
        if (err) {
                dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
-               goto err_eq_cleanup;
+               goto err_disable_msix;
        }
 
        err = mlx5_start_eqs(dev);
@@ -1130,15 +1053,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
        }
 
        err = mlx5_irq_set_affinity_hints(dev);
-       if (err)
+       if (err) {
                dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
-
-       MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
-
-       mlx5_init_cq_table(dev);
-       mlx5_init_qp_table(dev);
-       mlx5_init_srq_table(dev);
-       mlx5_init_mkey_table(dev);
+               goto err_affinity_hints;
+       }
 
        err = mlx5_init_fs(dev);
        if (err) {
@@ -1146,36 +1064,26 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
                goto err_fs;
        }
 
-       err = mlx5_init_rl_table(dev);
-       if (err) {
-               dev_err(&pdev->dev, "Failed to init rate limiting\n");
-               goto err_rl;
-       }
-
 #ifdef CONFIG_MLX5_CORE_EN
-       err = mlx5_eswitch_init(dev);
-       if (err) {
-               dev_err(&pdev->dev, "eswitch init failed %d\n", err);
-               goto err_reg_dev;
-       }
+       mlx5_eswitch_attach(dev->priv.eswitch);
 #endif
 
-       err = mlx5_sriov_init(dev);
+       err = mlx5_sriov_attach(dev);
        if (err) {
                dev_err(&pdev->dev, "sriov init failed %d\n", err);
                goto err_sriov;
        }
 
-       err = mlx5_register_device(dev);
-       if (err) {
-               dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
-               goto err_reg_dev;
+       if (mlx5_device_registered(dev)) {
+               mlx5_attach_device(dev);
+       } else {
+               err = mlx5_register_device(dev);
+               if (err) {
+                       dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
+                       goto err_reg_dev;
+               }
        }
 
-       err = request_module_nowait(MLX5_IB_MOD);
-       if (err)
-               pr_info("failed request module on %s\n", MLX5_IB_MOD);
-
        clear_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
        set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
 out:
@@ -1183,23 +1091,19 @@ out:
 
        return 0;
 
-err_sriov:
-       if (mlx5_sriov_cleanup(dev))
-               dev_err(&dev->pdev->dev, "sriov cleanup failed\n");
+err_reg_dev:
+       mlx5_sriov_detach(dev);
 
+err_sriov:
 #ifdef CONFIG_MLX5_CORE_EN
-       mlx5_eswitch_cleanup(dev->priv.eswitch);
+       mlx5_eswitch_detach(dev->priv.eswitch);
 #endif
-err_reg_dev:
-       mlx5_cleanup_rl_table(dev);
-err_rl:
        mlx5_cleanup_fs(dev);
+
 err_fs:
-       mlx5_cleanup_mkey_table(dev);
-       mlx5_cleanup_srq_table(dev);
-       mlx5_cleanup_qp_table(dev);
-       mlx5_cleanup_cq_table(dev);
        mlx5_irq_clear_affinity_hints(dev);
+
+err_affinity_hints:
        free_comp_eqs(dev);
 
 err_stop_eqs:
@@ -1208,12 +1112,13 @@ err_stop_eqs:
 err_free_uar:
        mlx5_free_uuars(dev, &priv->uuari);
 
-err_eq_cleanup:
-       mlx5_eq_cleanup(dev);
-
-disable_msix:
+err_disable_msix:
        mlx5_disable_msix(dev);
 
+err_cleanup_once:
+       if (boot)
+               mlx5_cleanup_once(dev);
+
 err_stop_poll:
        mlx5_stop_health_poll(dev);
        if (mlx5_cmd_teardown_hca(dev)) {
@@ -1230,8 +1135,7 @@ reclaim_boot_pages:
 err_disable_hca:
        mlx5_core_disable_hca(dev, 0);
 
-err_pagealloc_cleanup:
-       mlx5_pagealloc_cleanup(dev);
+err_cmd_cleanup:
        mlx5_cmd_cleanup(dev);
 
 out_err:
@@ -1241,40 +1145,35 @@ out_err:
        return err;
 }
 
-static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
+                          bool cleanup)
 {
        int err = 0;
 
-       err = mlx5_sriov_cleanup(dev);
-       if (err) {
-               dev_warn(&dev->pdev->dev, "%s: sriov cleanup failed - abort\n",
-                        __func__);
-               return err;
-       }
-
        mutex_lock(&dev->intf_state_mutex);
        if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) {
                dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
                         __func__);
+               if (cleanup)
+                       mlx5_cleanup_once(dev);
                goto out;
        }
-       mlx5_unregister_device(dev);
+
+       if (mlx5_device_registered(dev))
+               mlx5_detach_device(dev);
+
+       mlx5_sriov_detach(dev);
 #ifdef CONFIG_MLX5_CORE_EN
-       mlx5_eswitch_cleanup(dev->priv.eswitch);
+       mlx5_eswitch_detach(dev->priv.eswitch);
 #endif
-
-       mlx5_cleanup_rl_table(dev);
        mlx5_cleanup_fs(dev);
-       mlx5_cleanup_mkey_table(dev);
-       mlx5_cleanup_srq_table(dev);
-       mlx5_cleanup_qp_table(dev);
-       mlx5_cleanup_cq_table(dev);
        mlx5_irq_clear_affinity_hints(dev);
        free_comp_eqs(dev);
        mlx5_stop_eqs(dev);
        mlx5_free_uuars(dev, &priv->uuari);
-       mlx5_eq_cleanup(dev);
        mlx5_disable_msix(dev);
+       if (cleanup)
+               mlx5_cleanup_once(dev);
        mlx5_stop_health_poll(dev);
        err = mlx5_cmd_teardown_hca(dev);
        if (err) {
@@ -1284,7 +1183,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
        mlx5_pagealloc_stop(dev);
        mlx5_reclaim_startup_pages(dev);
        mlx5_core_disable_hca(dev, 0);
-       mlx5_pagealloc_cleanup(dev);
        mlx5_cmd_cleanup(dev);
 
 out:
@@ -1294,22 +1192,6 @@ out:
        return err;
 }
 
-void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
-                    unsigned long param)
-{
-       struct mlx5_priv *priv = &dev->priv;
-       struct mlx5_device_context *dev_ctx;
-       unsigned long flags;
-
-       spin_lock_irqsave(&priv->ctx_lock, flags);
-
-       list_for_each_entry(dev_ctx, &priv->ctx_list, list)
-               if (dev_ctx->intf->event)
-                       dev_ctx->intf->event(dev, dev_ctx->context, event, param);
-
-       spin_unlock_irqrestore(&priv->ctx_lock, flags);
-}
-
 struct mlx5_core_event_handler {
        void (*event)(struct mlx5_core_dev *dev,
                      enum mlx5_dev_event event,
@@ -1323,6 +1205,7 @@ static const struct devlink_ops mlx5_devlink_ops = {
 #endif
 };
 
+#define MLX5_IB_MOD "mlx5_ib"
 static int init_one(struct pci_dev *pdev,
                    const struct pci_device_id *id)
 {
@@ -1344,8 +1227,9 @@ static int init_one(struct pci_dev *pdev,
        pci_set_drvdata(pdev, dev);
 
        if (prof_sel < 0 || prof_sel >= ARRAY_SIZE(profile)) {
-               pr_warn("selected profile out of range, selecting default (%d)\n",
-                       MLX5_DEFAULT_PROF);
+               mlx5_core_warn(dev,
+                              "selected profile out of range, selecting default (%d)\n",
+                              MLX5_DEFAULT_PROF);
                prof_sel = MLX5_DEFAULT_PROF;
        }
        dev->profile = &profile[prof_sel];
@@ -1368,12 +1252,18 @@ static int init_one(struct pci_dev *pdev,
                goto close_pci;
        }
 
-       err = mlx5_load_one(dev, priv);
+       mlx5_pagealloc_init(dev);
+
+       err = mlx5_load_one(dev, priv, true);
        if (err) {
                dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
                goto clean_health;
        }
 
+       err = request_module_nowait(MLX5_IB_MOD);
+       if (err)
+               pr_info("failed request module on %s\n", MLX5_IB_MOD);
+
        err = devlink_register(devlink, &pdev->dev);
        if (err)
                goto clean_load;
@@ -1381,8 +1271,9 @@ static int init_one(struct pci_dev *pdev,
        return 0;
 
 clean_load:
-       mlx5_unload_one(dev, priv);
+       mlx5_unload_one(dev, priv, true);
 clean_health:
+       mlx5_pagealloc_cleanup(dev);
        mlx5_health_cleanup(dev);
 close_pci:
        mlx5_pci_close(dev, priv);
@@ -1400,11 +1291,15 @@ static void remove_one(struct pci_dev *pdev)
        struct mlx5_priv *priv = &dev->priv;
 
        devlink_unregister(devlink);
-       if (mlx5_unload_one(dev, priv)) {
+       mlx5_unregister_device(dev);
+
+       if (mlx5_unload_one(dev, priv, true)) {
                dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n");
                mlx5_health_cleanup(dev);
                return;
        }
+
+       mlx5_pagealloc_cleanup(dev);
        mlx5_health_cleanup(dev);
        mlx5_pci_close(dev, priv);
        pci_set_drvdata(pdev, NULL);
@@ -1419,37 +1314,13 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
 
        dev_info(&pdev->dev, "%s was called\n", __func__);
        mlx5_enter_error_state(dev);
-       mlx5_unload_one(dev, priv);
+       mlx5_unload_one(dev, priv, false);
+       pci_save_state(pdev);
        mlx5_pci_disable_device(dev);
        return state == pci_channel_io_perm_failure ?
                PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
 }
 
-static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
-{
-       struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
-       int err = 0;
-
-       dev_info(&pdev->dev, "%s was called\n", __func__);
-
-       err = mlx5_pci_enable_device(dev);
-       if (err) {
-               dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
-                       , __func__, err);
-               return PCI_ERS_RESULT_DISCONNECT;
-       }
-       pci_set_master(pdev);
-       pci_set_power_state(pdev, PCI_D0);
-       pci_restore_state(pdev);
-
-       return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
-}
-
-void mlx5_disable_device(struct mlx5_core_dev *dev)
-{
-       mlx5_pci_err_detected(dev->pdev, 0);
-}
-
 /* wait for the device to show vital signs by waiting
  * for the health counter to start counting.
  */
@@ -1477,22 +1348,45 @@ static int wait_vital(struct pci_dev *pdev)
        return -ETIMEDOUT;
 }
 
-static void mlx5_pci_resume(struct pci_dev *pdev)
+static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
 {
        struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
-       struct mlx5_priv *priv = &dev->priv;
        int err;
 
        dev_info(&pdev->dev, "%s was called\n", __func__);
 
-       pci_save_state(pdev);
-       err = wait_vital(pdev);
+       err = mlx5_pci_enable_device(dev);
        if (err) {
+               dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
+                       , __func__, err);
+               return PCI_ERS_RESULT_DISCONNECT;
+       }
+
+       pci_set_master(pdev);
+       pci_restore_state(pdev);
+
+       if (wait_vital(pdev)) {
                dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
-               return;
+               return PCI_ERS_RESULT_DISCONNECT;
        }
 
-       err = mlx5_load_one(dev, priv);
+       return PCI_ERS_RESULT_RECOVERED;
+}
+
+void mlx5_disable_device(struct mlx5_core_dev *dev)
+{
+       mlx5_pci_err_detected(dev->pdev, 0);
+}
+
+static void mlx5_pci_resume(struct pci_dev *pdev)
+{
+       struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+       struct mlx5_priv *priv = &dev->priv;
+       int err;
+
+       dev_info(&pdev->dev, "%s was called\n", __func__);
+
+       err = mlx5_load_one(dev, priv, false);
        if (err)
                dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
                        , __func__, err);
@@ -1514,7 +1408,7 @@ static void shutdown(struct pci_dev *pdev)
        dev_info(&pdev->dev, "Shutdown was called\n");
        /* Notify mlx5 clients that the kernel is being shut down */
        set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state);
-       mlx5_unload_one(dev, priv);
+       mlx5_unload_one(dev, priv, false);
        mlx5_pci_disable_device(dev);
 }
 
index d5a0c2d..ba2b09c 100644 (file)
 #include <rdma/ib_verbs.h>
 #include "mlx5_core.h"
 
-struct mlx5_attach_mcg_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  qpn;
-       __be32                  rsvd;
-       u8                      gid[16];
-};
-
-struct mlx5_attach_mcg_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvf[8];
-};
-
-struct mlx5_detach_mcg_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  qpn;
-       __be32                  rsvd;
-       u8                      gid[16];
-};
-
-struct mlx5_detach_mcg_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvf[8];
-};
-
 int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
 {
-       struct mlx5_attach_mcg_mbox_in in;
-       struct mlx5_attach_mcg_mbox_out out;
-       int err;
+       u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)]   = {0};
+       void *gid;
 
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ATTACH_TO_MCG);
-       memcpy(in.gid, mgid, sizeof(*mgid));
-       in.qpn = cpu_to_be32(qpn);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-       if (err)
-               return err;
-
-       if (out.hdr.status)
-               err = mlx5_cmd_status_to_err(&out.hdr);
-
-       return err;
+       MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG);
+       MLX5_SET(attach_to_mcg_in, in, qpn, qpn);
+       gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid);
+       memcpy(gid, mgid, sizeof(*mgid));
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_attach_mcg);
 
 int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
 {
-       struct mlx5_detach_mcg_mbox_in in;
-       struct mlx5_detach_mcg_mbox_out out;
-       int err;
-
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DETTACH_FROM_MCG);
-       memcpy(in.gid, mgid, sizeof(*mgid));
-       in.qpn = cpu_to_be32(qpn);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-       if (err)
-               return err;
-
-       if (out.hdr.status)
-               err = mlx5_cmd_status_to_err(&out.hdr);
+       u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)]   = {0};
+       void *gid;
 
-       return err;
+       MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+       MLX5_SET(detach_from_mcg_in, in, qpn, qpn);
+       gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid);
+       memcpy(gid, mgid, sizeof(*mgid));
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_detach_mcg);
index 2f86ec6..3d0cfb9 100644 (file)
@@ -58,8 +58,8 @@ do {                                                                  \
 } while (0)
 
 #define mlx5_core_err(__dev, format, ...)                              \
-       dev_err(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format,      \
-              (__dev)->priv.name, __func__, __LINE__, current->pid,    \
+       dev_err(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \
+               __func__, __LINE__, current->pid,       \
               ##__VA_ARGS__)
 
 #define mlx5_core_warn(__dev, format, ...)                             \
@@ -75,19 +75,6 @@ enum {
        MLX5_CMD_TIME, /* print command execution time */
 };
 
-static inline int mlx5_cmd_exec_check_status(struct mlx5_core_dev *dev, u32 *in,
-                                            int in_size, u32 *out,
-                                            int out_size)
-{
-       int err;
-
-       err = mlx5_cmd_exec(dev, in, in_size, out, out_size);
-       if (err)
-               return err;
-
-       return mlx5_cmd_status_to_err((struct mlx5_outbox_hdr *)out);
-}
-
 int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
 int mlx5_query_board_id(struct mlx5_core_dev *dev);
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev);
@@ -96,7 +83,12 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
                     unsigned long param);
 void mlx5_enter_error_state(struct mlx5_core_dev *dev);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
+int mlx5_sriov_init(struct mlx5_core_dev *dev);
+void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
+int mlx5_sriov_attach(struct mlx5_core_dev *dev);
+void mlx5_sriov_detach(struct mlx5_core_dev *dev);
 int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
+bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev);
 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
 int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
@@ -105,7 +97,38 @@ u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx);
 struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
 void mlx5_cq_tasklet_cb(unsigned long data);
 
+void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
+void mlx5_lag_remove(struct mlx5_core_dev *dev);
+
+void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
+void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
+void mlx5_attach_device(struct mlx5_core_dev *dev);
+void mlx5_detach_device(struct mlx5_core_dev *dev);
+bool mlx5_device_registered(struct mlx5_core_dev *dev);
+int mlx5_register_device(struct mlx5_core_dev *dev);
+void mlx5_unregister_device(struct mlx5_core_dev *dev);
+void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol);
+void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol);
+struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
+void mlx5_dev_list_lock(void);
+void mlx5_dev_list_unlock(void);
+int mlx5_dev_list_trylock(void);
+
+bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv);
+
 void mlx5e_init(void);
 void mlx5e_cleanup(void);
 
+static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
+{
+       /* LACP owner conditions:
+        * 1) Function is physical.
+        * 2) LAG is supported by FW.
+        * 3) LAG is managed by driver (currently the only option).
+        */
+       return  MLX5_CAP_GEN(dev, vport_group_manager) &&
+                  (MLX5_CAP_GEN(dev, num_lag_ports) > 1) &&
+                   MLX5_CAP_GEN(dev, lag_master);
+}
+
 #endif /* __MLX5_CORE_H__ */
index 77a7293..b9736f5 100644 (file)
@@ -49,48 +49,43 @@ void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev)
 {
 }
 
-int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
-                         struct mlx5_core_mkey *mkey,
-                         struct mlx5_create_mkey_mbox_in *in, int inlen,
-                         mlx5_cmd_cbk_t callback, void *context,
-                         struct mlx5_create_mkey_mbox_out *out)
+int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
+                            struct mlx5_core_mkey *mkey,
+                            u32 *in, int inlen,
+                            u32 *out, int outlen,
+                            mlx5_cmd_cbk_t callback, void *context)
 {
        struct mlx5_mkey_table *table = &dev->priv.mkey_table;
-       struct mlx5_create_mkey_mbox_out lout;
+       u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
+       u32 mkey_index;
+       void *mkc;
        int err;
        u8 key;
 
-       memset(&lout, 0, sizeof(lout));
        spin_lock_irq(&dev->priv.mkey_lock);
        key = dev->priv.mkey_key++;
        spin_unlock_irq(&dev->priv.mkey_lock);
-       in->seg.qpn_mkey7_0 |= cpu_to_be32(key);
-       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_MKEY);
-       if (callback) {
-               err = mlx5_cmd_exec_cb(dev, in, inlen, out, sizeof(*out),
-                                      callback, context);
-               return err;
-       } else {
-               err = mlx5_cmd_exec(dev, in, inlen, &lout, sizeof(lout));
-       }
+       mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 
-       if (err) {
-               mlx5_core_dbg(dev, "cmd exec failed %d\n", err);
-               return err;
-       }
+       MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
+       MLX5_SET(mkc, mkc, mkey_7_0, key);
 
-       if (lout.hdr.status) {
-               mlx5_core_dbg(dev, "status %d\n", lout.hdr.status);
-               return mlx5_cmd_status_to_err(&lout.hdr);
-       }
+       if (callback)
+               return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen,
+                                       callback, context);
+
+       err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout));
+       if (err)
+               return err;
 
-       mkey->iova = be64_to_cpu(in->seg.start_addr);
-       mkey->size = be64_to_cpu(in->seg.len);
-       mkey->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key;
-       mkey->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff;
+       mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
+       mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
+       mkey->size = MLX5_GET64(mkc, mkc, len);
+       mkey->key = mlx5_idx_to_mkey(mkey_index) | key;
+       mkey->pd = MLX5_GET(mkc, mkc, pd);
 
        mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
-                     be32_to_cpu(lout.mkey), key, mkey->key);
+                     mkey_index, key, mkey->key);
 
        /* connect to mkey tree */
        write_lock_irq(&table->lock);
@@ -104,20 +99,25 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
 
        return err;
 }
+EXPORT_SYMBOL(mlx5_core_create_mkey_cb);
+
+int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
+                         struct mlx5_core_mkey *mkey,
+                         u32 *in, int inlen)
+{
+       return mlx5_core_create_mkey_cb(dev, mkey, in, inlen,
+                                       NULL, 0, NULL, NULL);
+}
 EXPORT_SYMBOL(mlx5_core_create_mkey);
 
 int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
                           struct mlx5_core_mkey *mkey)
 {
        struct mlx5_mkey_table *table = &dev->priv.mkey_table;
-       struct mlx5_destroy_mkey_mbox_in in;
-       struct mlx5_destroy_mkey_mbox_out out;
+       u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)]   = {0};
        struct mlx5_core_mkey *deleted_mkey;
        unsigned long flags;
-       int err;
-
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
 
        write_lock_irqsave(&table->lock, flags);
        deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key));
@@ -128,94 +128,71 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
                return -ENOENT;
        }
 
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY);
-       in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key));
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-       if (err)
-               return err;
-
-       if (out.hdr.status)
-               return mlx5_cmd_status_to_err(&out.hdr);
-
-       return err;
+       MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
+       MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_destroy_mkey);
 
 int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
-                        struct mlx5_query_mkey_mbox_out *out, int outlen)
+                        u32 *out, int outlen)
 {
-       struct mlx5_query_mkey_mbox_in in;
-       int err;
+       u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {0};
 
-       memset(&in, 0, sizeof(in));
        memset(out, 0, outlen);
-
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_MKEY);
-       in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key));
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
-       if (err)
-               return err;
-
-       if (out->hdr.status)
-               return mlx5_cmd_status_to_err(&out->hdr);
-
-       return err;
+       MLX5_SET(query_mkey_in, in, opcode, MLX5_CMD_OP_QUERY_MKEY);
+       MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 EXPORT_SYMBOL(mlx5_core_query_mkey);
 
 int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey,
                             u32 *mkey)
 {
-       struct mlx5_query_special_ctxs_mbox_in in;
-       struct mlx5_query_special_ctxs_mbox_out out;
+       u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)]   = {0};
        int err;
 
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
-
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-       if (err)
-               return err;
-
-       if (out.hdr.status)
-               return mlx5_cmd_status_to_err(&out.hdr);
-
-       *mkey = be32_to_cpu(out.dump_fill_mkey);
-
+       MLX5_SET(query_special_contexts_in, in, opcode,
+                MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       if (!err)
+               *mkey = MLX5_GET(query_special_contexts_out, out,
+                                dump_fill_mkey);
        return err;
 }
 EXPORT_SYMBOL(mlx5_core_dump_fill_mkey);
 
+static inline u32 mlx5_get_psv(u32 *out, int psv_index)
+{
+       switch (psv_index) {
+       case 1: return MLX5_GET(create_psv_out, out, psv1_index);
+       case 2: return MLX5_GET(create_psv_out, out, psv2_index);
+       case 3: return MLX5_GET(create_psv_out, out, psv3_index);
+       default: return MLX5_GET(create_psv_out, out, psv0_index);
+       }
+}
+
 int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
                         int npsvs, u32 *sig_index)
 {
-       struct mlx5_allocate_psv_in in;
-       struct mlx5_allocate_psv_out out;
+       u32 out[MLX5_ST_SZ_DW(create_psv_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(create_psv_in)]   = {0};
        int i, err;
 
        if (npsvs > MLX5_MAX_PSVS)
                return -EINVAL;
 
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
+       MLX5_SET(create_psv_in, in, opcode, MLX5_CMD_OP_CREATE_PSV);
+       MLX5_SET(create_psv_in, in, pd, pdn);
+       MLX5_SET(create_psv_in, in, num_psv, npsvs);
 
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_PSV);
-       in.npsv_pd = cpu_to_be32((npsvs << 28) | pdn);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-       if (err) {
-               mlx5_core_err(dev, "cmd exec failed %d\n", err);
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       if (err)
                return err;
-       }
-
-       if (out.hdr.status) {
-               mlx5_core_err(dev, "create_psv bad status %d\n",
-                             out.hdr.status);
-               return mlx5_cmd_status_to_err(&out.hdr);
-       }
 
        for (i = 0; i < npsvs; i++)
-               sig_index[i] = be32_to_cpu(out.psv_idx[i]) & 0xffffff;
+               sig_index[i] = mlx5_get_psv(out, i);
 
        return err;
 }
@@ -223,29 +200,11 @@ EXPORT_SYMBOL(mlx5_core_create_psv);
 
 int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num)
 {
-       struct mlx5_destroy_psv_in in;
-       struct mlx5_destroy_psv_out out;
-       int err;
-
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
+       u32 out[MLX5_ST_SZ_DW(destroy_psv_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(destroy_psv_in)]   = {0};
 
-       in.psv_number = cpu_to_be32(psv_num);
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_PSV);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-       if (err) {
-               mlx5_core_err(dev, "destroy_psv cmd exec failed %d\n", err);
-               goto out;
-       }
-
-       if (out.hdr.status) {
-               mlx5_core_err(dev, "destroy_psv bad status %d\n",
-                             out.hdr.status);
-               err = mlx5_cmd_status_to_err(&out.hdr);
-               goto out;
-       }
-
-out:
-       return err;
+       MLX5_SET(destroy_psv_in, in, opcode, MLX5_CMD_OP_DESTROY_PSV);
+       MLX5_SET(destroy_psv_in, in, psvn, psv_num);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_destroy_psv);
index 32dea35..d458515 100644 (file)
@@ -44,12 +44,6 @@ enum {
        MLX5_PAGES_TAKE         = 2
 };
 
-enum {
-       MLX5_BOOT_PAGES         = 1,
-       MLX5_INIT_PAGES         = 2,
-       MLX5_POST_INIT_PAGES    = 3
-};
-
 struct mlx5_pages_req {
        struct mlx5_core_dev *dev;
        u16     func_id;
@@ -67,33 +61,6 @@ struct fw_page {
        unsigned                free_count;
 };
 
-struct mlx5_query_pages_inbox {
-       struct mlx5_inbox_hdr   hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_query_pages_outbox {
-       struct mlx5_outbox_hdr  hdr;
-       __be16                  rsvd;
-       __be16                  func_id;
-       __be32                  num_pages;
-};
-
-struct mlx5_manage_pages_inbox {
-       struct mlx5_inbox_hdr   hdr;
-       __be16                  rsvd;
-       __be16                  func_id;
-       __be32                  num_entries;
-       __be64                  pas[0];
-};
-
-struct mlx5_manage_pages_outbox {
-       struct mlx5_outbox_hdr  hdr;
-       __be32                  num_entries;
-       u8                      rsvd[4];
-       __be64                  pas[0];
-};
-
 enum {
        MAX_RECLAIM_TIME_MSECS  = 5000,
        MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60,
@@ -167,24 +134,21 @@ static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr)
 static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
                                s32 *npages, int boot)
 {
-       struct mlx5_query_pages_inbox   in;
-       struct mlx5_query_pages_outbox  out;
+       u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(query_pages_in)]   = {0};
        int err;
 
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_PAGES);
-       in.hdr.opmod = boot ? cpu_to_be16(MLX5_BOOT_PAGES) : cpu_to_be16(MLX5_INIT_PAGES);
+       MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES);
+       MLX5_SET(query_pages_in, in, op_mod, boot ?
+                MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES :
+                MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES);
 
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
        if (err)
                return err;
 
-       if (out.hdr.status)
-               return mlx5_cmd_status_to_err(&out.hdr);
-
-       *npages = be32_to_cpu(out.num_pages);
-       *func_id = be16_to_cpu(out.func_id);
+       *npages = MLX5_GET(query_pages_out, out, num_pages);
+       *func_id = MLX5_GET(query_pages_out, out, function_id);
 
        return err;
 }
@@ -280,46 +244,37 @@ out_alloc:
 
 static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id)
 {
-       struct mlx5_manage_pages_inbox *in;
-       struct mlx5_manage_pages_outbox out;
+       u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(manage_pages_in)]   = {0};
        int err;
 
-       in = kzalloc(sizeof(*in), GFP_KERNEL);
-       if (!in)
-               return;
-
-       memset(&out, 0, sizeof(out));
-       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
-       in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
-       in->func_id = cpu_to_be16(func_id);
-       err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
-       if (!err)
-               err = mlx5_cmd_status_to_err(&out.hdr);
+       MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+       MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE);
+       MLX5_SET(manage_pages_in, in, function_id, func_id);
 
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
        if (err)
-               mlx5_core_warn(dev, "page notify failed\n");
-
-       kfree(in);
+               mlx5_core_warn(dev, "page notify failed func_id(%d) err(%d)\n",
+                              func_id, err);
 }
 
 static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
                      int notify_fail)
 {
-       struct mlx5_manage_pages_inbox *in;
-       struct mlx5_manage_pages_outbox out;
-       int inlen;
+       u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
+       int inlen = MLX5_ST_SZ_BYTES(manage_pages_in);
        u64 addr;
        int err;
+       u32 *in;
        int i;
 
-       inlen = sizeof(*in) + npages * sizeof(in->pas[0]);
+       inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]);
        in = mlx5_vzalloc(inlen);
        if (!in) {
                err = -ENOMEM;
                mlx5_core_warn(dev, "vzalloc failed %d\n", inlen);
                goto out_free;
        }
-       memset(&out, 0, sizeof(out));
 
        for (i = 0; i < npages; i++) {
 retry:
@@ -332,27 +287,21 @@ retry:
 
                        goto retry;
                }
-               in->pas[i] = cpu_to_be64(addr);
+               MLX5_SET64(manage_pages_in, in, pas[i], addr);
        }
 
-       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
-       in->hdr.opmod = cpu_to_be16(MLX5_PAGES_GIVE);
-       in->func_id = cpu_to_be16(func_id);
-       in->num_entries = cpu_to_be32(npages);
-       err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+       MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+       MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE);
+       MLX5_SET(manage_pages_in, in, function_id, func_id);
+       MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+
+       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
        if (err) {
                mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
                               func_id, npages, err);
                goto out_4k;
        }
 
-       err = mlx5_cmd_status_to_err(&out.hdr);
-       if (err) {
-               mlx5_core_warn(dev, "func_id 0x%x, npages %d, status %d\n",
-                              func_id, npages, out.hdr.status);
-               goto out_4k;
-       }
-
        dev->priv.fw_pages += npages;
        if (func_id)
                dev->priv.vfs_pages += npages;
@@ -364,7 +313,7 @@ retry:
 
 out_4k:
        for (i--; i >= 0; i--)
-               free_4k(dev, be64_to_cpu(in->pas[i]));
+               free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]));
 out_free:
        kvfree(in);
        if (notify_fail)
@@ -373,64 +322,67 @@ out_free:
 }
 
 static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
-                            struct mlx5_manage_pages_inbox *in, int in_size,
-                            struct mlx5_manage_pages_outbox *out, int out_size)
+                            u32 *in, int in_size, u32 *out, int out_size)
 {
        struct fw_page *fwp;
        struct rb_node *p;
+       u32 func_id;
        u32 npages;
        u32 i = 0;
 
        if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
-               return mlx5_cmd_exec_check_status(dev, (u32 *)in, in_size,
-                                                 (u32 *)out, out_size);
+               return mlx5_cmd_exec(dev, in, in_size, out, out_size);
 
-       npages = be32_to_cpu(in->num_entries);
+       /* No hard feelings, we want our pages back! */
+       npages = MLX5_GET(manage_pages_in, in, input_num_entries);
+       func_id = MLX5_GET(manage_pages_in, in, function_id);
 
        p = rb_first(&dev->priv.page_root);
        while (p && i < npages) {
                fwp = rb_entry(p, struct fw_page, rb_node);
-               out->pas[i] = cpu_to_be64(fwp->addr);
                p = rb_next(p);
+               if (fwp->func_id != func_id)
+                       continue;
+
+               MLX5_SET64(manage_pages_out, out, pas[i], fwp->addr);
                i++;
        }
 
-       out->num_entries = cpu_to_be32(i);
+       MLX5_SET(manage_pages_out, out, output_num_entries, i);
        return 0;
 }
 
 static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
                         int *nclaimed)
 {
-       struct mlx5_manage_pages_inbox   in;
-       struct mlx5_manage_pages_outbox *out;
+       int outlen = MLX5_ST_SZ_BYTES(manage_pages_out);
+       u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
        int num_claimed;
-       int outlen;
-       u64 addr;
+       u32 *out;
        int err;
        int i;
 
        if (nclaimed)
                *nclaimed = 0;
 
-       memset(&in, 0, sizeof(in));
-       outlen = sizeof(*out) + npages * sizeof(out->pas[0]);
+       outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
        out = mlx5_vzalloc(outlen);
        if (!out)
                return -ENOMEM;
 
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
-       in.hdr.opmod = cpu_to_be16(MLX5_PAGES_TAKE);
-       in.func_id = cpu_to_be16(func_id);
-       in.num_entries = cpu_to_be32(npages);
+       MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+       MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE);
+       MLX5_SET(manage_pages_in, in, function_id, func_id);
+       MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+
        mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
-       err = reclaim_pages_cmd(dev, &in, sizeof(in), out, outlen);
+       err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen);
        if (err) {
                mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
                goto out_free;
        }
 
-       num_claimed = be32_to_cpu(out->num_entries);
+       num_claimed = MLX5_GET(manage_pages_out, out, output_num_entries);
        if (num_claimed > npages) {
                mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n",
                               num_claimed, npages);
@@ -438,10 +390,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
                goto out_free;
        }
 
-       for (i = 0; i < num_claimed; i++) {
-               addr = be64_to_cpu(out->pas[i]);
-               free_4k(dev, addr);
-       }
+       for (i = 0; i < num_claimed; i++)
+               free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]));
+
 
        if (nclaimed)
                *nclaimed = num_claimed;
@@ -518,8 +469,8 @@ static int optimal_reclaimed_pages(void)
        int ret;
 
        ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) -
-              sizeof(struct mlx5_manage_pages_outbox)) /
-              FIELD_SIZEOF(struct mlx5_manage_pages_outbox, pas[0]);
+              MLX5_ST_SZ_BYTES(manage_pages_out)) /
+              MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
 
        return ret;
 }
@@ -594,6 +545,12 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
        unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
        int prev_vfs_pages = dev->priv.vfs_pages;
 
+       /* In case of internal error we will free the pages manually later */
+       if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+               mlx5_core_warn(dev, "Skipping wait for vf pages stage");
+               return 0;
+       }
+
        mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages,
                      dev->priv.name);
        while (dev->priv.vfs_pages) {
index f2d3aee..bd830d8 100644 (file)
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 
-struct mlx5_alloc_pd_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_alloc_pd_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       __be32                  pdn;
-       u8                      rsvd[4];
-};
-
-struct mlx5_dealloc_pd_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  pdn;
-       u8                      rsvd[4];
-};
-
-struct mlx5_dealloc_pd_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-};
-
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn)
 {
-       struct mlx5_alloc_pd_mbox_in    in;
-       struct mlx5_alloc_pd_mbox_out   out;
+       u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(alloc_pd_in)]   = {0};
        int err;
 
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_PD);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-       if (err)
-               return err;
-
-       if (out.hdr.status)
-               return mlx5_cmd_status_to_err(&out.hdr);
-
-       *pdn = be32_to_cpu(out.pdn) & 0xffffff;
+       MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       if (!err)
+               *pdn = MLX5_GET(alloc_pd_out, out, pd);
        return err;
 }
 EXPORT_SYMBOL(mlx5_core_alloc_pd);
 
 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn)
 {
-       struct mlx5_dealloc_pd_mbox_in  in;
-       struct mlx5_dealloc_pd_mbox_out out;
-       int err;
-
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_PD);
-       in.pdn = cpu_to_be32(pdn);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-       if (err)
-               return err;
+       u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)]   = {0};
 
-       if (out.hdr.status)
-               return mlx5_cmd_status_to_err(&out.hdr);
-
-       return err;
+       MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
+       MLX5_SET(dealloc_pd_in, in, pd, pdn);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_dealloc_pd);
index 752c081..34e7184 100644 (file)
 
 int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
                         int size_in, void *data_out, int size_out,
-                        u16 reg_num, int arg, int write)
+                        u16 reg_id, int arg, int write)
 {
-       struct mlx5_access_reg_mbox_in *in = NULL;
-       struct mlx5_access_reg_mbox_out *out = NULL;
+       int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out;
+       int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in;
        int err = -ENOMEM;
+       u32 *out = NULL;
+       u32 *in = NULL;
+       void *data;
 
-       in = mlx5_vzalloc(sizeof(*in) + size_in);
-       if (!in)
-               return -ENOMEM;
-
-       out = mlx5_vzalloc(sizeof(*out) + size_out);
-       if (!out)
-               goto ex1;
-
-       memcpy(in->data, data_in, size_in);
-       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ACCESS_REG);
-       in->hdr.opmod = cpu_to_be16(!write);
-       in->arg = cpu_to_be32(arg);
-       in->register_id = cpu_to_be16(reg_num);
-       err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out,
-                           sizeof(*out) + size_out);
-       if (err)
-               goto ex2;
+       in = mlx5_vzalloc(inlen);
+       out = mlx5_vzalloc(outlen);
+       if (!in || !out)
+               goto out;
 
-       if (out->hdr.status)
-               err = mlx5_cmd_status_to_err(&out->hdr);
+       data = MLX5_ADDR_OF(access_register_in, in, register_data);
+       memcpy(data, data_in, size_in);
 
-       if (!err)
-               memcpy(data_out, out->data, size_out);
+       MLX5_SET(access_register_in, in, opcode, MLX5_CMD_OP_ACCESS_REG);
+       MLX5_SET(access_register_in, in, op_mod, !write);
+       MLX5_SET(access_register_in, in, argument, arg);
+       MLX5_SET(access_register_in, in, register_id, reg_id);
+
+       err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+       if (err)
+               goto out;
+
+       data = MLX5_ADDR_OF(access_register_out, out, register_data);
+       memcpy(data_out, data, size_out);
 
-ex2:
+out:
        kvfree(out);
-ex1:
        kvfree(in);
        return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_access_reg);
 
-
 struct mlx5_reg_pcap {
        u8                      rsvd0;
        u8                      port_num;
@@ -104,12 +101,10 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_caps);
 int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
                         int ptys_size, int proto_mask, u8 local_port)
 {
-       u32 in[MLX5_ST_SZ_DW(ptys_reg)];
+       u32 in[MLX5_ST_SZ_DW(ptys_reg)] = {0};
 
-       memset(in, 0, sizeof(in));
        MLX5_SET(ptys_reg, in, local_port, local_port);
        MLX5_SET(ptys_reg, in, proto_mask, proto_mask);
-
        return mlx5_core_access_reg(dev, in, sizeof(in), ptys,
                                    ptys_size, MLX5_REG_PTYS, 0, 0);
 }
@@ -117,13 +112,11 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_ptys);
 
 int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration)
 {
+       u32 in[MLX5_ST_SZ_DW(mlcr_reg)]  = {0};
        u32 out[MLX5_ST_SZ_DW(mlcr_reg)];
-       u32 in[MLX5_ST_SZ_DW(mlcr_reg)];
 
-       memset(in, 0, sizeof(in));
        MLX5_SET(mlcr_reg, in, local_port, 1);
        MLX5_SET(mlcr_reg, in, beacon_duration, beacon_duration);
-
        return mlx5_core_access_reg(dev, in, sizeof(in), out,
                                    sizeof(out), MLX5_REG_MLCR, 0, 1);
 }
@@ -182,25 +175,39 @@ int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper);
 
-int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev,
-                              u8 *proto_oper, int proto_mask,
-                              u8 local_port)
+int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev,
+                                  u32 *proto_oper, u8 local_port)
 {
        u32 out[MLX5_ST_SZ_DW(ptys_reg)];
        int err;
 
-       err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, local_port);
+       err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN,
+                                  local_port);
        if (err)
                return err;
 
-       if (proto_mask == MLX5_PTYS_EN)
-               *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
-       else
-               *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
+       *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
+
+       return 0;
+}
+EXPORT_SYMBOL(mlx5_query_port_eth_proto_oper);
+
+int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
+                                 u8 *proto_oper, u8 local_port)
+{
+       u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+       int err;
+
+       err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB,
+                                  local_port);
+       if (err)
+               return err;
+
+       *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(mlx5_query_port_proto_oper);
+EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper);
 
 int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable,
                       u32 proto_admin, int proto_mask)
@@ -246,15 +253,12 @@ EXPORT_SYMBOL_GPL(mlx5_toggle_port_link);
 int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
                               enum mlx5_port_status status)
 {
-       u32 in[MLX5_ST_SZ_DW(paos_reg)];
+       u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0};
        u32 out[MLX5_ST_SZ_DW(paos_reg)];
 
-       memset(in, 0, sizeof(in));
-
        MLX5_SET(paos_reg, in, local_port, 1);
        MLX5_SET(paos_reg, in, admin_status, status);
        MLX5_SET(paos_reg, in, ase, 1);
-
        return mlx5_core_access_reg(dev, in, sizeof(in), out,
                                    sizeof(out), MLX5_REG_PAOS, 0, 1);
 }
@@ -263,19 +267,15 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_admin_status);
 int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
                                 enum mlx5_port_status *status)
 {
-       u32 in[MLX5_ST_SZ_DW(paos_reg)];
+       u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0};
        u32 out[MLX5_ST_SZ_DW(paos_reg)];
        int err;
 
-       memset(in, 0, sizeof(in));
-
        MLX5_SET(paos_reg, in, local_port, 1);
-
        err = mlx5_core_access_reg(dev, in, sizeof(in), out,
                                   sizeof(out), MLX5_REG_PAOS, 0, 0);
        if (err)
                return err;
-
        *status = MLX5_GET(paos_reg, out, admin_status);
        return 0;
 }
@@ -284,13 +284,10 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_admin_status);
 static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu,
                                u16 *max_mtu, u16 *oper_mtu, u8 port)
 {
-       u32 in[MLX5_ST_SZ_DW(pmtu_reg)];
+       u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0};
        u32 out[MLX5_ST_SZ_DW(pmtu_reg)];
 
-       memset(in, 0, sizeof(in));
-
        MLX5_SET(pmtu_reg, in, local_port, port);
-
        mlx5_core_access_reg(dev, in, sizeof(in), out,
                             sizeof(out), MLX5_REG_PMTU, 0, 0);
 
@@ -304,14 +301,11 @@ static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu,
 
 int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port)
 {
-       u32 in[MLX5_ST_SZ_DW(pmtu_reg)];
+       u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0};
        u32 out[MLX5_ST_SZ_DW(pmtu_reg)];
 
-       memset(in, 0, sizeof(in));
-
        MLX5_SET(pmtu_reg, in, admin_mtu, mtu);
        MLX5_SET(pmtu_reg, in, local_port, port);
-
        return mlx5_core_access_reg(dev, in, sizeof(in), out,
                                   sizeof(out), MLX5_REG_PMTU, 0, 1);
 }
@@ -333,15 +327,12 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu);
 
 static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num)
 {
+       u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {0};
        u32 out[MLX5_ST_SZ_DW(pmlp_reg)];
-       u32 in[MLX5_ST_SZ_DW(pmlp_reg)];
        int module_mapping;
        int err;
 
-       memset(in, 0, sizeof(in));
-
        MLX5_SET(pmlp_reg, in, local_port, 1);
-
        err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
                                   MLX5_REG_PMLP, 0, 0);
        if (err)
@@ -410,11 +401,9 @@ EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom);
 static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc,
                                int pvlc_size,  u8 local_port)
 {
-       u32 in[MLX5_ST_SZ_DW(pvlc_reg)];
+       u32 in[MLX5_ST_SZ_DW(pvlc_reg)] = {0};
 
-       memset(in, 0, sizeof(in));
        MLX5_SET(pvlc_reg, in, local_port, local_port);
-
        return mlx5_core_access_reg(dev, in, sizeof(in), pvlc,
                                    pvlc_size, MLX5_REG_PVLC, 0, 0);
 }
@@ -460,10 +449,9 @@ EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt);
 
 int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause)
 {
-       u32 in[MLX5_ST_SZ_DW(pfcc_reg)];
+       u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
        u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
 
-       memset(in, 0, sizeof(in));
        MLX5_SET(pfcc_reg, in, local_port, 1);
        MLX5_SET(pfcc_reg, in, pptx, tx_pause);
        MLX5_SET(pfcc_reg, in, pprx, rx_pause);
@@ -476,13 +464,11 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_pause);
 int mlx5_query_port_pause(struct mlx5_core_dev *dev,
                          u32 *rx_pause, u32 *tx_pause)
 {
-       u32 in[MLX5_ST_SZ_DW(pfcc_reg)];
+       u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
        u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
        int err;
 
-       memset(in, 0, sizeof(in));
        MLX5_SET(pfcc_reg, in, local_port, 1);
-
        err = mlx5_core_access_reg(dev, in, sizeof(in), out,
                                   sizeof(out), MLX5_REG_PFCC, 0, 0);
        if (err)
@@ -500,10 +486,9 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_pause);
 
 int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx)
 {
-       u32 in[MLX5_ST_SZ_DW(pfcc_reg)];
+       u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
        u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
 
-       memset(in, 0, sizeof(in));
        MLX5_SET(pfcc_reg, in, local_port, 1);
        MLX5_SET(pfcc_reg, in, pfctx, pfc_en_tx);
        MLX5_SET(pfcc_reg, in, pfcrx, pfc_en_rx);
@@ -517,13 +502,11 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_pfc);
 
 int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx)
 {
-       u32 in[MLX5_ST_SZ_DW(pfcc_reg)];
+       u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
        u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
        int err;
 
-       memset(in, 0, sizeof(in));
        MLX5_SET(pfcc_reg, in, local_port, 1);
-
        err = mlx5_core_access_reg(dev, in, sizeof(in), out,
                                   sizeof(out), MLX5_REG_PFCC, 0, 0);
        if (err)
@@ -567,12 +550,11 @@ int mlx5_max_tc(struct mlx5_core_dev *mdev)
 
 int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc)
 {
-       u32 in[MLX5_ST_SZ_DW(qtct_reg)];
+       u32 in[MLX5_ST_SZ_DW(qtct_reg)] = {0};
        u32 out[MLX5_ST_SZ_DW(qtct_reg)];
        int err;
        int i;
 
-       memset(in, 0, sizeof(in));
        for (i = 0; i < 8; i++) {
                if (prio_tc[i] > mlx5_max_tc(mdev))
                        return -EINVAL;
@@ -617,11 +599,9 @@ static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out,
 
 int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group)
 {
-       u32 in[MLX5_ST_SZ_DW(qetc_reg)];
+       u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0};
        int i;
 
-       memset(in, 0, sizeof(in));
-
        for (i = 0; i <= mlx5_max_tc(mdev); i++) {
                MLX5_SET(qetc_reg, in, tc_configuration[i].g, 1);
                MLX5_SET(qetc_reg, in, tc_configuration[i].group, tc_group[i]);
@@ -633,11 +613,9 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group);
 
 int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw)
 {
-       u32 in[MLX5_ST_SZ_DW(qetc_reg)];
+       u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0};
        int i;
 
-       memset(in, 0, sizeof(in));
-
        for (i = 0; i <= mlx5_max_tc(mdev); i++) {
                MLX5_SET(qetc_reg, in, tc_configuration[i].b, 1);
                MLX5_SET(qetc_reg, in, tc_configuration[i].bw_allocation, tc_bw[i]);
@@ -651,12 +629,10 @@ int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev,
                                    u8 *max_bw_value,
                                    u8 *max_bw_units)
 {
-       u32 in[MLX5_ST_SZ_DW(qetc_reg)];
+       u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0};
        void *ets_tcn_conf;
        int i;
 
-       memset(in, 0, sizeof(in));
-
        MLX5_SET(qetc_reg, in, port_number, 1);
 
        for (i = 0; i <= mlx5_max_tc(mdev); i++) {
@@ -701,35 +677,24 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_ets_rate_limit);
 
 int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode)
 {
-       u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)];
-       u32 out[MLX5_ST_SZ_DW(set_wol_rol_out)];
-
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
+       u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(set_wol_rol_out)] = {0};
 
        MLX5_SET(set_wol_rol_in, in, opcode, MLX5_CMD_OP_SET_WOL_ROL);
        MLX5_SET(set_wol_rol_in, in, wol_mode_valid, 1);
        MLX5_SET(set_wol_rol_in, in, wol_mode, wol_mode);
-
-       return mlx5_cmd_exec_check_status(mdev, in, sizeof(in),
-                                         out, sizeof(out));
+       return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL_GPL(mlx5_set_port_wol);
 
 int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode)
 {
-       u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)];
-       u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)];
+       u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)] = {0};
        int err;
 
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
-
        MLX5_SET(query_wol_rol_in, in, opcode, MLX5_CMD_OP_QUERY_WOL_ROL);
-
-       err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in),
-                                        out, sizeof(out));
-
+       err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
        if (!err)
                *wol_mode = MLX5_GET(query_wol_rol_out, out, wol_mode);
 
@@ -740,11 +705,9 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_wol);
 static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out,
                                  int outlen)
 {
-       u32 in[MLX5_ST_SZ_DW(pcmr_reg)];
+       u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
 
-       memset(in, 0, sizeof(in));
        MLX5_SET(pcmr_reg, in, local_port, 1);
-
        return mlx5_core_access_reg(mdev, in, sizeof(in), out,
                                    outlen, MLX5_REG_PCMR, 0, 0);
 }
@@ -759,12 +722,10 @@ static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen)
 
 int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable)
 {
-       u32 in[MLX5_ST_SZ_DW(pcmr_reg)];
+       u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
 
-       memset(in, 0, sizeof(in));
        MLX5_SET(pcmr_reg, in, local_port, 1);
        MLX5_SET(pcmr_reg, in, fcs_chk, enable);
-
        return mlx5_set_ports_check(mdev, in, sizeof(in));
 }
 
index b82d658..d0a4005 100644 (file)
@@ -271,30 +271,20 @@ static void destroy_qprqsq_common(struct mlx5_core_dev *dev,
 
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
                        struct mlx5_core_qp *qp,
-                       struct mlx5_create_qp_mbox_in *in,
-                       int inlen)
+                       u32 *in, int inlen)
 {
-       struct mlx5_create_qp_mbox_out out;
-       struct mlx5_destroy_qp_mbox_in din;
-       struct mlx5_destroy_qp_mbox_out dout;
+       u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {0};
+       u32 dout[MLX5_ST_SZ_DW(destroy_qp_out)];
+       u32 din[MLX5_ST_SZ_DW(destroy_qp_in)];
        int err;
 
-       memset(&out, 0, sizeof(out));
-       in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP);
+       MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
 
-       err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
-       if (err) {
-               mlx5_core_warn(dev, "ret %d\n", err);
+       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+       if (err)
                return err;
-       }
-
-       if (out.hdr.status) {
-               mlx5_core_warn(dev, "current num of QPs 0x%x\n",
-                              atomic_read(&dev->num_qps));
-               return mlx5_cmd_status_to_err(&out.hdr);
-       }
 
-       qp->qpn = be32_to_cpu(out.qpn) & 0xffffff;
+       qp->qpn = MLX5_GET(create_qp_out, out, qpn);
        mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
 
        err = create_qprqsq_common(dev, qp, MLX5_RES_QP);
@@ -311,12 +301,11 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
        return 0;
 
 err_cmd:
-       memset(&din, 0, sizeof(din));
-       memset(&dout, 0, sizeof(dout));
-       din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP);
-       din.qpn = cpu_to_be32(qp->qpn);
-       mlx5_cmd_exec(dev, &din, sizeof(din), &out, sizeof(dout));
-
+       memset(din, 0, sizeof(din));
+       memset(dout, 0, sizeof(dout));
+       MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+       MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+       mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
        return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_create_qp);
@@ -324,45 +313,145 @@ EXPORT_SYMBOL_GPL(mlx5_core_create_qp);
 int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
                         struct mlx5_core_qp *qp)
 {
-       struct mlx5_destroy_qp_mbox_in in;
-       struct mlx5_destroy_qp_mbox_out out;
+       u32 out[MLX5_ST_SZ_DW(destroy_qp_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(destroy_qp_in)]   = {0};
        int err;
 
        mlx5_debug_qp_remove(dev, qp);
 
        destroy_qprqsq_common(dev, qp);
 
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP);
-       in.qpn = cpu_to_be32(qp->qpn);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+       MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+       MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
        if (err)
                return err;
 
-       if (out.hdr.status)
-               return mlx5_cmd_status_to_err(&out.hdr);
-
        atomic_dec(&dev->num_qps);
        return 0;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp);
 
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation,
-                       struct mlx5_modify_qp_mbox_in *in, int sqd_event,
+struct mbox_info {
+       u32 *in;
+       u32 *out;
+       int inlen;
+       int outlen;
+};
+
+static int mbox_alloc(struct mbox_info *mbox, int inlen, int outlen)
+{
+       mbox->inlen  = inlen;
+       mbox->outlen = outlen;
+       mbox->in = kzalloc(mbox->inlen, GFP_KERNEL);
+       mbox->out = kzalloc(mbox->outlen, GFP_KERNEL);
+       if (!mbox->in || !mbox->out) {
+               kfree(mbox->in);
+               kfree(mbox->out);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static void mbox_free(struct mbox_info *mbox)
+{
+       kfree(mbox->in);
+       kfree(mbox->out);
+}
+
+static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
+                               u32 opt_param_mask, void *qpc,
+                               struct mbox_info *mbox)
+{
+       mbox->out = NULL;
+       mbox->in = NULL;
+
+#define MBOX_ALLOC(mbox, typ)  \
+       mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out))
+
+#define MOD_QP_IN_SET(typ, in, _opcode, _qpn) \
+       MLX5_SET(typ##_in, in, opcode, _opcode); \
+       MLX5_SET(typ##_in, in, qpn, _qpn)
+
+#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc) \
+       MOD_QP_IN_SET(typ, in, _opcode, _qpn); \
+       MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \
+       memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, MLX5_ST_SZ_BYTES(qpc))
+
+       switch (opcode) {
+       /* 2RST & 2ERR */
+       case MLX5_CMD_OP_2RST_QP:
+               if (MBOX_ALLOC(mbox, qp_2rst))
+                       return -ENOMEM;
+               MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn);
+               break;
+       case MLX5_CMD_OP_2ERR_QP:
+               if (MBOX_ALLOC(mbox, qp_2err))
+                       return -ENOMEM;
+               MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn);
+               break;
+
+       /* MODIFY with QPC */
+       case MLX5_CMD_OP_RST2INIT_QP:
+               if (MBOX_ALLOC(mbox, rst2init_qp))
+                       return -ENOMEM;
+                MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn,
+                                  opt_param_mask, qpc);
+                break;
+       case MLX5_CMD_OP_INIT2RTR_QP:
+               if (MBOX_ALLOC(mbox, init2rtr_qp))
+                       return -ENOMEM;
+                MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn,
+                                  opt_param_mask, qpc);
+                break;
+       case MLX5_CMD_OP_RTR2RTS_QP:
+               if (MBOX_ALLOC(mbox, rtr2rts_qp))
+                       return -ENOMEM;
+                MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn,
+                                  opt_param_mask, qpc);
+                break;
+       case MLX5_CMD_OP_RTS2RTS_QP:
+               if (MBOX_ALLOC(mbox, rts2rts_qp))
+                       return -ENOMEM;
+               MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn,
+                                 opt_param_mask, qpc);
+               break;
+       case MLX5_CMD_OP_SQERR2RTS_QP:
+               if (MBOX_ALLOC(mbox, sqerr2rts_qp))
+                       return -ENOMEM;
+               MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn,
+                                 opt_param_mask, qpc);
+               break;
+       case MLX5_CMD_OP_INIT2INIT_QP:
+               if (MBOX_ALLOC(mbox, init2init_qp))
+                       return -ENOMEM;
+               MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn,
+                                 opt_param_mask, qpc);
+               break;
+       default:
+               mlx5_core_err(dev, "Unknown transition for modify QP: OP(0x%x) QPN(0x%x)\n",
+                             opcode, qpn);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode,
+                       u32 opt_param_mask, void *qpc,
                        struct mlx5_core_qp *qp)
 {
-       struct mlx5_modify_qp_mbox_out out;
-       int err = 0;
+       struct mbox_info mbox;
+       int err;
 
-       memset(&out, 0, sizeof(out));
-       in->hdr.opcode = cpu_to_be16(operation);
-       in->qpn = cpu_to_be32(qp->qpn);
-       err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
+       err = modify_qp_mbox_alloc(dev, opcode, qp->qpn,
+                                  opt_param_mask, qpc, &mbox);
        if (err)
                return err;
 
-       return mlx5_cmd_status_to_err(&out.hdr);
+       err = mlx5_cmd_exec(dev, mbox.in, mbox.inlen, mbox.out, mbox.outlen);
+       mbox_free(&mbox);
+       return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_qp_modify);
 
@@ -382,66 +471,38 @@ void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev)
 }
 
 int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
-                      struct mlx5_query_qp_mbox_out *out, int outlen)
+                      u32 *out, int outlen)
 {
-       struct mlx5_query_qp_mbox_in in;
-       int err;
-
-       memset(&in, 0, sizeof(in));
-       memset(out, 0, outlen);
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_QP);
-       in.qpn = cpu_to_be32(qp->qpn);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
-       if (err)
-               return err;
+       u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {0};
 
-       if (out->hdr.status)
-               return mlx5_cmd_status_to_err(&out->hdr);
-
-       return err;
+       MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP);
+       MLX5_SET(query_qp_in, in, qpn, qp->qpn);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 EXPORT_SYMBOL_GPL(mlx5_core_qp_query);
 
 int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn)
 {
-       struct mlx5_alloc_xrcd_mbox_in in;
-       struct mlx5_alloc_xrcd_mbox_out out;
+       u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)]   = {0};
        int err;
 
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_XRCD);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-       if (err)
-               return err;
-
-       if (out.hdr.status)
-               err = mlx5_cmd_status_to_err(&out.hdr);
-       else
-               *xrcdn = be32_to_cpu(out.xrcdn) & 0xffffff;
-
+       MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD);
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       if (!err)
+               *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd);
        return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_xrcd_alloc);
 
 int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
 {
-       struct mlx5_dealloc_xrcd_mbox_in in;
-       struct mlx5_dealloc_xrcd_mbox_out out;
-       int err;
+       u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)]   = {0};
 
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_XRCD);
-       in.xrcdn = cpu_to_be32(xrcdn);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-       if (err)
-               return err;
-
-       if (out.hdr.status)
-               err = mlx5_cmd_status_to_err(&out.hdr);
-
-       return err;
+       MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+       MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
 
@@ -449,28 +510,23 @@ EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
 int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
                                u8 flags, int error)
 {
-       struct mlx5_page_fault_resume_mbox_in in;
-       struct mlx5_page_fault_resume_mbox_out out;
-       int err;
-
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_PAGE_FAULT_RESUME);
-       in.hdr.opmod = 0;
-       flags &= (MLX5_PAGE_FAULT_RESUME_REQUESTOR |
-                 MLX5_PAGE_FAULT_RESUME_WRITE     |
-                 MLX5_PAGE_FAULT_RESUME_RDMA);
-       flags |= (error ? MLX5_PAGE_FAULT_RESUME_ERROR : 0);
-       in.flags_qpn = cpu_to_be32((qpn & MLX5_QPN_MASK) |
-                                  (flags << MLX5_QPN_BITS));
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-       if (err)
-               return err;
-
-       if (out.hdr.status)
-               err = mlx5_cmd_status_to_err(&out.hdr);
-
-       return err;
+       u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)]   = {0};
+
+       MLX5_SET(page_fault_resume_in, in, opcode,
+                MLX5_CMD_OP_PAGE_FAULT_RESUME);
+       MLX5_SET(page_fault_resume_in, in, qpn, qpn);
+
+       if (flags & MLX5_PAGE_FAULT_RESUME_REQUESTOR)
+               MLX5_SET(page_fault_resume_in, in, req_res, 1);
+       if (flags & MLX5_PAGE_FAULT_RESUME_WRITE)
+               MLX5_SET(page_fault_resume_in, in, read_write, 1);
+       if (flags & MLX5_PAGE_FAULT_RESUME_RDMA)
+               MLX5_SET(page_fault_resume_in, in, rdma, 1);
+       if (error)
+               MLX5_SET(page_fault_resume_in, in, error, 1);
+
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
 #endif
@@ -541,15 +597,12 @@ EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
 
 int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id)
 {
-       u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)];
-       u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)];
+       u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0};
        int err;
 
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
-
        MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
-       err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
        if (!err)
                *counter_id = MLX5_GET(alloc_q_counter_out, out,
                                       counter_set_id);
@@ -559,31 +612,25 @@ EXPORT_SYMBOL_GPL(mlx5_core_alloc_q_counter);
 
 int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id)
 {
-       u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)];
-       u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)];
-
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
+       u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)] = {0};
 
        MLX5_SET(dealloc_q_counter_in, in, opcode,
                 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
        MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter_id);
-       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-                                         sizeof(out));
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL_GPL(mlx5_core_dealloc_q_counter);
 
 int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
                              int reset, void *out, int out_size)
 {
-       u32 in[MLX5_ST_SZ_DW(query_q_counter_in)];
-
-       memset(in, 0, sizeof(in));
+       u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {0};
 
        MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
        MLX5_SET(query_q_counter_in, in, clear, reset);
        MLX5_SET(query_q_counter_in, in, counter_set_id, counter_id);
-       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_size);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
 }
 EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter);
 
index c07c28b..104902a 100644 (file)
@@ -63,19 +63,14 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
 static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev,
                                   u32 rate, u16 index)
 {
-       u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)];
-       u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)];
-
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
+       u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0};
 
        MLX5_SET(set_rate_limit_in, in, opcode,
                 MLX5_CMD_OP_SET_RATE_LIMIT);
        MLX5_SET(set_rate_limit_in, in, rate_limit_index, index);
        MLX5_SET(set_rate_limit_in, in, rate_limit, rate);
-
-       return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
-                                         out, sizeof(out));
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate)
index b380a6b..e086277 100644 (file)
 #include "eswitch.h"
 #endif
 
-static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs)
+bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev)
+{
+       struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+
+       return !!sriov->num_vfs;
+}
+
+static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
 {
        struct mlx5_core_sriov *sriov = &dev->priv.sriov;
        int err;
        int vf;
 
-       for (vf = 1; vf <= num_vfs; vf++) {
-               err = mlx5_core_enable_hca(dev, vf);
+       if (sriov->enabled_vfs) {
+               mlx5_core_warn(dev,
+                              "failed to enable SRIOV on device, already enabled with %d vfs\n",
+                              sriov->enabled_vfs);
+               return -EBUSY;
+       }
+
+#ifdef CONFIG_MLX5_CORE_EN
+       err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
+       if (err) {
+               mlx5_core_warn(dev,
+                              "failed to enable eswitch SRIOV (%d)\n", err);
+               return err;
+       }
+#endif
+
+       for (vf = 0; vf < num_vfs; vf++) {
+               err = mlx5_core_enable_hca(dev, vf + 1);
                if (err) {
-                       mlx5_core_warn(dev, "failed to enable VF %d\n", vf - 1);
-               } else {
-                       sriov->vfs_ctx[vf - 1].enabled = 1;
-                       mlx5_core_dbg(dev, "successfully enabled VF %d\n", vf - 1);
+                       mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err);
+                       continue;
                }
+               sriov->vfs_ctx[vf].enabled = 1;
+               sriov->enabled_vfs++;
+               mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf);
+
        }
+
+       return 0;
 }
 
-static void disable_vfs(struct mlx5_core_dev *dev, int num_vfs)
+static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
 {
        struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+       int err;
        int vf;
 
-       for (vf = 1; vf <= num_vfs; vf++) {
-               if (sriov->vfs_ctx[vf - 1].enabled) {
-                       if (mlx5_core_disable_hca(dev, vf))
-                               mlx5_core_warn(dev, "failed to disable VF %d\n", vf - 1);
-                       else
-                               sriov->vfs_ctx[vf - 1].enabled = 0;
+       if (!sriov->enabled_vfs)
+               return;
+
+       for (vf = 0; vf < sriov->num_vfs; vf++) {
+               if (!sriov->vfs_ctx[vf].enabled)
+                       continue;
+               err = mlx5_core_disable_hca(dev, vf + 1);
+               if (err) {
+                       mlx5_core_warn(dev, "failed to disable VF %d\n", vf);
+                       continue;
                }
+               sriov->vfs_ctx[vf].enabled = 0;
+               sriov->enabled_vfs--;
        }
+
+#ifdef CONFIG_MLX5_CORE_EN
+       mlx5_eswitch_disable_sriov(dev->priv.eswitch);
+#endif
+
+       if (mlx5_wait_for_vf_pages(dev))
+               mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
 }
 
-static int mlx5_core_create_vfs(struct pci_dev *pdev, int num_vfs)
+static int mlx5_pci_enable_sriov(struct pci_dev *pdev, int num_vfs)
 {
        struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
-       int err;
-
-       if (pci_num_vf(pdev))
-               pci_disable_sriov(pdev);
+       int err = 0;
 
-       enable_vfs(dev, num_vfs);
-
-       err = pci_enable_sriov(pdev, num_vfs);
-       if (err) {
-               dev_warn(&pdev->dev, "enable sriov failed %d\n", err);
-               goto ex;
+       if (pci_num_vf(pdev)) {
+               mlx5_core_warn(dev, "Unable to enable pci sriov, already enabled\n");
+               return -EBUSY;
        }
 
-       return 0;
+       err = pci_enable_sriov(pdev, num_vfs);
+       if (err)
+               mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err);
 
-ex:
-       disable_vfs(dev, num_vfs);
        return err;
 }
 
-static int mlx5_core_sriov_enable(struct pci_dev *pdev, int num_vfs)
+static void mlx5_pci_disable_sriov(struct pci_dev *pdev)
+{
+       pci_disable_sriov(pdev);
+}
+
+static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
 {
        struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
        struct mlx5_core_sriov *sriov = &dev->priv.sriov;
-       int err;
+       int err = 0;
 
-       kfree(sriov->vfs_ctx);
-       sriov->vfs_ctx = kcalloc(num_vfs, sizeof(*sriov->vfs_ctx), GFP_ATOMIC);
-       if (!sriov->vfs_ctx)
-               return -ENOMEM;
+       err = mlx5_device_enable_sriov(dev, num_vfs);
+       if (err) {
+               mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err);
+               return err;
+       }
 
-       sriov->enabled_vfs = num_vfs;
-       err = mlx5_core_create_vfs(pdev, num_vfs);
+       err = mlx5_pci_enable_sriov(pdev, num_vfs);
        if (err) {
-               kfree(sriov->vfs_ctx);
-               sriov->vfs_ctx = NULL;
+               mlx5_core_warn(dev, "mlx5_pci_enable_sriov failed : %d\n", err);
+               mlx5_device_disable_sriov(dev);
                return err;
        }
 
+       sriov->num_vfs = num_vfs;
+
        return 0;
 }
 
-static void mlx5_core_init_vfs(struct mlx5_core_dev *dev, int num_vfs)
+static void mlx5_sriov_disable(struct pci_dev *pdev)
 {
+       struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
        struct mlx5_core_sriov *sriov = &dev->priv.sriov;
 
-       sriov->num_vfs = num_vfs;
-}
-
-static void mlx5_core_cleanup_vfs(struct mlx5_core_dev *dev)
-{
-       struct mlx5_core_sriov *sriov;
-
-       sriov = &dev->priv.sriov;
-       disable_vfs(dev, sriov->num_vfs);
-
-       if (mlx5_wait_for_vf_pages(dev))
-               mlx5_core_warn(dev, "timeout claiming VFs pages\n");
-
+       mlx5_pci_disable_sriov(pdev);
+       mlx5_device_disable_sriov(dev);
        sriov->num_vfs = 0;
 }
 
 int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
 {
        struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
-       struct mlx5_core_sriov *sriov = &dev->priv.sriov;
-       int err;
+       int err = 0;
 
        mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs);
        if (!mlx5_core_is_pf(dev))
                return -EPERM;
 
-       mlx5_core_cleanup_vfs(dev);
-
-       if (!num_vfs) {
-#ifdef CONFIG_MLX5_CORE_EN
-               mlx5_eswitch_disable_sriov(dev->priv.eswitch);
-#endif
-               kfree(sriov->vfs_ctx);
-               sriov->vfs_ctx = NULL;
-               if (!pci_vfs_assigned(pdev))
-                       pci_disable_sriov(pdev);
-               else
-                       pr_info("unloading PF driver while leaving orphan VFs\n");
-               return 0;
+       if (num_vfs && mlx5_lag_is_active(dev)) {
+               mlx5_core_warn(dev, "can't turn sriov on while LAG is active");
+               return -EINVAL;
        }
 
-       err = mlx5_core_sriov_enable(pdev, num_vfs);
-       if (err) {
-               dev_warn(&pdev->dev, "mlx5_core_sriov_enable failed %d\n", err);
-               return err;
-       }
+       if (num_vfs)
+               err = mlx5_sriov_enable(pdev, num_vfs);
+       else
+               mlx5_sriov_disable(pdev);
 
-       mlx5_core_init_vfs(dev, num_vfs);
-#ifdef CONFIG_MLX5_CORE_EN
-       mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
-#endif
-
-       return num_vfs;
+       return err ? err : num_vfs;
 }
 
-static int sync_required(struct pci_dev *pdev)
+int mlx5_sriov_attach(struct mlx5_core_dev *dev)
 {
-       struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
        struct mlx5_core_sriov *sriov = &dev->priv.sriov;
-       int cur_vfs = pci_num_vf(pdev);
 
-       if (cur_vfs != sriov->num_vfs) {
-               pr_info("current VFs %d, registered %d - sync needed\n", cur_vfs, sriov->num_vfs);
-               return 1;
-       }
+       if (!mlx5_core_is_pf(dev) || !sriov->num_vfs)
+               return 0;
 
-       return 0;
+       /* If sriov VFs exist in PCI level, enable them in device level */
+       return mlx5_device_enable_sriov(dev, sriov->num_vfs);
+}
+
+void mlx5_sriov_detach(struct mlx5_core_dev *dev)
+{
+       if (!mlx5_core_is_pf(dev))
+               return;
+
+       mlx5_device_disable_sriov(dev);
 }
 
 int mlx5_sriov_init(struct mlx5_core_dev *dev)
 {
        struct mlx5_core_sriov *sriov = &dev->priv.sriov;
        struct pci_dev *pdev = dev->pdev;
-       int cur_vfs;
+       int total_vfs;
 
        if (!mlx5_core_is_pf(dev))
                return 0;
 
-       if (!sync_required(dev->pdev))
-               return 0;
-
-       cur_vfs = pci_num_vf(pdev);
-       sriov->vfs_ctx = kcalloc(cur_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
+       total_vfs = pci_sriov_get_totalvfs(pdev);
+       sriov->num_vfs = pci_num_vf(pdev);
+       sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
        if (!sriov->vfs_ctx)
                return -ENOMEM;
 
-       sriov->enabled_vfs = cur_vfs;
-
-       mlx5_core_init_vfs(dev, cur_vfs);
-#ifdef CONFIG_MLX5_CORE_EN
-       if (cur_vfs)
-               mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs,
-                                         SRIOV_LEGACY);
-#endif
-
-       enable_vfs(dev, cur_vfs);
-
        return 0;
 }
 
-int mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
+void mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
 {
-       struct pci_dev *pdev = dev->pdev;
-       int err;
+       struct mlx5_core_sriov *sriov = &dev->priv.sriov;
 
        if (!mlx5_core_is_pf(dev))
-               return 0;
+               return;
 
-       err = mlx5_core_sriov_configure(pdev, 0);
-       if (err)
-               return err;
-
-       return 0;
+       kfree(sriov->vfs_ctx);
 }
index c07f4d0..3099630 100644 (file)
@@ -175,8 +175,8 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
        MLX5_SET(create_srq_in, create_in, opcode,
                 MLX5_CMD_OP_CREATE_SRQ);
 
-       err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out,
-                                        sizeof(create_out));
+       err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
+                           sizeof(create_out));
        kvfree(create_in);
        if (!err)
                srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
@@ -194,8 +194,8 @@ static int destroy_srq_cmd(struct mlx5_core_dev *dev,
                 MLX5_CMD_OP_DESTROY_SRQ);
        MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
 
-       return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in),
-                                         srq_out, sizeof(srq_out));
+       return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
+                            srq_out, sizeof(srq_out));
 }
 
 static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
@@ -209,8 +209,8 @@ static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
        MLX5_SET(arm_xrc_srq_in, srq_in, xrc_srqn, srq->srqn);
        MLX5_SET(arm_xrc_srq_in, srq_in, lwm,      lwm);
 
-       return  mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in),
-                                          srq_out, sizeof(srq_out));
+       return  mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
+                             srq_out, sizeof(srq_out));
 }
 
 static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
@@ -228,9 +228,8 @@ static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
        MLX5_SET(query_srq_in, srq_in, opcode,
                 MLX5_CMD_OP_QUERY_SRQ);
        MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
-       err =  mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in),
-                                         srq_out,
-                                         MLX5_ST_SZ_BYTES(query_srq_out));
+       err =  mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
+                            srq_out, MLX5_ST_SZ_BYTES(query_srq_out));
        if (err)
                goto out;
 
@@ -272,8 +271,8 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
                 MLX5_CMD_OP_CREATE_XRC_SRQ);
 
        memset(create_out, 0, sizeof(create_out));
-       err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out,
-                                        sizeof(create_out));
+       err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
+                           sizeof(create_out));
        if (err)
                goto out;
 
@@ -286,36 +285,30 @@ out:
 static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
                               struct mlx5_core_srq *srq)
 {
-       u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)];
-       u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)];
-
-       memset(xrcsrq_in, 0, sizeof(xrcsrq_in));
-       memset(xrcsrq_out, 0, sizeof(xrcsrq_out));
+       u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]   = {0};
+       u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
 
        MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode,
                 MLX5_CMD_OP_DESTROY_XRC_SRQ);
        MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
 
-       return mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in),
-                                         xrcsrq_out, sizeof(xrcsrq_out));
+       return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
+                            xrcsrq_out, sizeof(xrcsrq_out));
 }
 
 static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
                           struct mlx5_core_srq *srq, u16 lwm)
 {
-       u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)];
-       u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)];
-
-       memset(xrcsrq_in, 0, sizeof(xrcsrq_in));
-       memset(xrcsrq_out, 0, sizeof(xrcsrq_out));
+       u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]   = {0};
+       u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
 
        MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode,   MLX5_CMD_OP_ARM_XRC_SRQ);
        MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod,   MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
        MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
        MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm,      lwm);
 
-       return  mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in),
-                                          xrcsrq_out, sizeof(xrcsrq_out));
+       return  mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
+                             xrcsrq_out, sizeof(xrcsrq_out));
 }
 
 static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
@@ -335,9 +328,9 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
        MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode,
                 MLX5_CMD_OP_QUERY_XRC_SRQ);
        MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
-       err =  mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in),
-                                         xrcsrq_out,
-                                         MLX5_ST_SZ_BYTES(query_xrc_srq_out));
+
+       err =  mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out,
+                            MLX5_ST_SZ_BYTES(query_xrc_srq_out));
        if (err)
                goto out;
 
index 28274a6..a00ff49 100644 (file)
 
 int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
 {
-       u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)];
-       u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)];
+       u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0};
        int err;
 
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
-
        MLX5_SET(alloc_transport_domain_in, in, opcode,
                 MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
 
-       err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
        if (!err)
                *tdn = MLX5_GET(alloc_transport_domain_out, out,
                                transport_domain);
@@ -57,29 +54,23 @@ EXPORT_SYMBOL(mlx5_core_alloc_transport_domain);
 
 void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
 {
-       u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)];
-       u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)];
-
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
+       u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0};
 
        MLX5_SET(dealloc_transport_domain_in, in, opcode,
                 MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
        MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
-
-       mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+       mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain);
 
 int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
 {
-       u32 out[MLX5_ST_SZ_DW(create_rq_out)];
+       u32 out[MLX5_ST_SZ_DW(create_rq_out)] = {0};
        int err;
 
        MLX5_SET(create_rq_in, in, opcode, MLX5_CMD_OP_CREATE_RQ);
-
-       memset(out, 0, sizeof(out));
-       err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
        if (!err)
                *rqn = MLX5_GET(create_rq_out, out, rqn);
 
@@ -95,21 +86,18 @@ int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen)
        MLX5_SET(modify_rq_in, in, opcode, MLX5_CMD_OP_MODIFY_RQ);
 
        memset(out, 0, sizeof(out));
-       return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+       return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_modify_rq);
 
 void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
 {
-       u32 in[MLX5_ST_SZ_DW(destroy_rq_in)];
-       u32 out[MLX5_ST_SZ_DW(destroy_rq_out)];
-
-       memset(in, 0, sizeof(in));
+       u32 in[MLX5_ST_SZ_DW(destroy_rq_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {0};
 
        MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
        MLX5_SET(destroy_rq_in, in, rqn, rqn);
-
-       mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+       mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_destroy_rq);
 
@@ -121,19 +109,17 @@ int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out)
        MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ);
        MLX5_SET(query_rq_in, in, rqn, rqn);
 
-       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 EXPORT_SYMBOL(mlx5_core_query_rq);
 
 int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn)
 {
-       u32 out[MLX5_ST_SZ_DW(create_sq_out)];
+       u32 out[MLX5_ST_SZ_DW(create_sq_out)] = {0};
        int err;
 
        MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ);
-
-       memset(out, 0, sizeof(out));
-       err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
        if (!err)
                *sqn = MLX5_GET(create_sq_out, out, sqn);
 
@@ -142,27 +128,22 @@ int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn)
 
 int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen)
 {
-       u32 out[MLX5_ST_SZ_DW(modify_sq_out)];
+       u32 out[MLX5_ST_SZ_DW(modify_sq_out)] = {0};
 
        MLX5_SET(modify_sq_in, in, sqn, sqn);
        MLX5_SET(modify_sq_in, in, opcode, MLX5_CMD_OP_MODIFY_SQ);
-
-       memset(out, 0, sizeof(out));
-       return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+       return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_modify_sq);
 
 void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
 {
-       u32 in[MLX5_ST_SZ_DW(destroy_sq_in)];
-       u32 out[MLX5_ST_SZ_DW(destroy_sq_out)];
-
-       memset(in, 0, sizeof(in));
+       u32 in[MLX5_ST_SZ_DW(destroy_sq_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {0};
 
        MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ);
        MLX5_SET(destroy_sq_in, in, sqn, sqn);
-
-       mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+       mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out)
@@ -172,21 +153,20 @@ int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out)
 
        MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ);
        MLX5_SET(query_sq_in, in, sqn, sqn);
-
-       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 EXPORT_SYMBOL(mlx5_core_query_sq);
 
 int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
                         u32 *tirn)
 {
-       u32 out[MLX5_ST_SZ_DW(create_tir_out)];
+       u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {0};
        int err;
 
        MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
 
        memset(out, 0, sizeof(out));
-       err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
        if (!err)
                *tirn = MLX5_GET(create_tir_out, out, tirn);
 
@@ -197,39 +177,32 @@ EXPORT_SYMBOL(mlx5_core_create_tir);
 int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
                         int inlen)
 {
-       u32 out[MLX5_ST_SZ_DW(modify_tir_out)];
+       u32 out[MLX5_ST_SZ_DW(modify_tir_out)] = {0};
 
        MLX5_SET(modify_tir_in, in, tirn, tirn);
        MLX5_SET(modify_tir_in, in, opcode, MLX5_CMD_OP_MODIFY_TIR);
-
-       memset(out, 0, sizeof(out));
-       return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+       return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 
 void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn)
 {
-       u32 in[MLX5_ST_SZ_DW(destroy_tir_in)];
-       u32 out[MLX5_ST_SZ_DW(destroy_tir_out)];
-
-       memset(in, 0, sizeof(in));
+       u32 in[MLX5_ST_SZ_DW(destroy_tir_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {0};
 
        MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR);
        MLX5_SET(destroy_tir_in, in, tirn, tirn);
-
-       mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+       mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_destroy_tir);
 
 int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
                         u32 *tisn)
 {
-       u32 out[MLX5_ST_SZ_DW(create_tis_out)];
+       u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {0};
        int err;
 
        MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS);
-
-       memset(out, 0, sizeof(out));
-       err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
        if (!err)
                *tisn = MLX5_GET(create_tis_out, out, tisn);
 
@@ -245,34 +218,29 @@ int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
        MLX5_SET(modify_tis_in, in, tisn, tisn);
        MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS);
 
-       return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+       return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_modify_tis);
 
 void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
 {
-       u32 in[MLX5_ST_SZ_DW(destroy_tis_in)];
-       u32 out[MLX5_ST_SZ_DW(destroy_tis_out)];
-
-       memset(in, 0, sizeof(in));
+       u32 in[MLX5_ST_SZ_DW(destroy_tis_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0};
 
        MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
        MLX5_SET(destroy_tis_in, in, tisn, tisn);
-
-       mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+       mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_destroy_tis);
 
 int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
                         u32 *rmpn)
 {
-       u32 out[MLX5_ST_SZ_DW(create_rmp_out)];
+       u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0};
        int err;
 
        MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP);
-
-       memset(out, 0, sizeof(out));
-       err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
        if (!err)
                *rmpn = MLX5_GET(create_rmp_out, out, rmpn);
 
@@ -281,38 +249,31 @@ int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
 
 int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen)
 {
-       u32 out[MLX5_ST_SZ_DW(modify_rmp_out)];
+       u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0};
 
        MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
-
-       memset(out, 0, sizeof(out));
-       return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+       return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 
 int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn)
 {
-       u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)];
-       u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)];
-
-       memset(in, 0, sizeof(in));
+       u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0};
 
        MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
        MLX5_SET(destroy_rmp_in, in, rmpn, rmpn);
-
-       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
+       return mlx5_cmd_exec(dev, in, sizeof(in), out,
                                          sizeof(out));
 }
 
 int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out)
 {
-       u32 in[MLX5_ST_SZ_DW(query_rmp_in)];
+       u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0};
        int outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
 
-       memset(in, 0, sizeof(in));
        MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP);
        MLX5_SET(query_rmp_in, in, rmpn,   rmpn);
-
-       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 
 int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm)
@@ -347,13 +308,11 @@ int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm)
 int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
                          u32 *xsrqn)
 {
-       u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)];
+       u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0};
        int err;
 
        MLX5_SET(create_xrc_srq_in, in, opcode,     MLX5_CMD_OP_CREATE_XRC_SRQ);
-
-       memset(out, 0, sizeof(out));
-       err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
        if (!err)
                *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn);
 
@@ -362,33 +321,25 @@ int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
 
 int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn)
 {
-       u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)];
-       u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)];
-
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
+       u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
 
        MLX5_SET(destroy_xrc_srq_in, in, opcode,   MLX5_CMD_OP_DESTROY_XRC_SRQ);
        MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn);
-
-       return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-                                         sizeof(out));
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u32 *out)
 {
-       u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)];
+       u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)] = {0};
        void *srqc;
        void *xrc_srqc;
        int err;
 
-       memset(in, 0, sizeof(in));
        MLX5_SET(query_xrc_srq_in, in, opcode,   MLX5_CMD_OP_QUERY_XRC_SRQ);
        MLX5_SET(query_xrc_srq_in, in, xrc_srqn, xsrqn);
-
-       err =  mlx5_cmd_exec_check_status(dev, in, sizeof(in),
-                                         out,
-                                         MLX5_ST_SZ_BYTES(query_xrc_srq_out));
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out,
+                           MLX5_ST_SZ_BYTES(query_xrc_srq_out));
        if (!err) {
                xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, out,
                                        xrc_srq_context_entry);
@@ -401,32 +352,25 @@ int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u32 *out)
 
 int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm)
 {
-       u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)];
-       u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)];
-
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
+       u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
 
        MLX5_SET(arm_xrc_srq_in, in, opcode,   MLX5_CMD_OP_ARM_XRC_SRQ);
        MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn);
        MLX5_SET(arm_xrc_srq_in, in, lwm,      lwm);
        MLX5_SET(arm_xrc_srq_in, in, op_mod,
                 MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
-
-       return  mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-                                          sizeof(out));
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
                         u32 *rqtn)
 {
-       u32 out[MLX5_ST_SZ_DW(create_rqt_out)];
+       u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
        int err;
 
        MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
-
-       memset(out, 0, sizeof(out));
-       err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+       err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
        if (!err)
                *rqtn = MLX5_GET(create_rqt_out, out, rqtn);
 
@@ -437,25 +381,20 @@ EXPORT_SYMBOL(mlx5_core_create_rqt);
 int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
                         int inlen)
 {
-       u32 out[MLX5_ST_SZ_DW(modify_rqt_out)];
+       u32 out[MLX5_ST_SZ_DW(modify_rqt_out)] = {0};
 
        MLX5_SET(modify_rqt_in, in, rqtn, rqtn);
        MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT);
-
-       memset(out, 0, sizeof(out));
-       return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+       return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 
 void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn)
 {
-       u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)];
-       u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)];
-
-       memset(in, 0, sizeof(in));
+       u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
 
        MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
        MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
-
-       mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+       mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_destroy_rqt);
index 5ff8af4..ab0b896 100644 (file)
@@ -42,73 +42,28 @@ enum {
        NUM_LOW_LAT_UUARS       = 4,
 };
 
-
-struct mlx5_alloc_uar_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_alloc_uar_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       __be32                  uarn;
-       u8                      rsvd[4];
-};
-
-struct mlx5_free_uar_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  uarn;
-       u8                      rsvd[4];
-};
-
-struct mlx5_free_uar_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-};
-
 int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
 {
-       struct mlx5_alloc_uar_mbox_in   in;
-       struct mlx5_alloc_uar_mbox_out  out;
+       u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(alloc_uar_in)]   = {0};
        int err;
 
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_UAR);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-       if (err)
-               goto ex;
-
-       if (out.hdr.status) {
-               err = mlx5_cmd_status_to_err(&out.hdr);
-               goto ex;
-       }
-
-       *uarn = be32_to_cpu(out.uarn) & 0xffffff;
-
-ex:
+       MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       if (!err)
+               *uarn = MLX5_GET(alloc_uar_out, out, uar);
        return err;
 }
 EXPORT_SYMBOL(mlx5_cmd_alloc_uar);
 
 int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
 {
-       struct mlx5_free_uar_mbox_in    in;
-       struct mlx5_free_uar_mbox_out   out;
-       int err;
-
-       memset(&in, 0, sizeof(in));
-       memset(&out, 0, sizeof(out));
-       in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_UAR);
-       in.uarn = cpu_to_be32(uarn);
-       err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-       if (err)
-               goto ex;
+       u32 out[MLX5_ST_SZ_DW(dealloc_uar_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)]   = {0};
 
-       if (out.hdr.status)
-               err = mlx5_cmd_status_to_err(&out.hdr);
-
-ex:
-       return err;
+       MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR);
+       MLX5_SET(dealloc_uar_in, in, uar, uarn);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_cmd_free_uar);
 
index 21365d0..525f17a 100644 (file)
 static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod,
                                   u16 vport, u32 *out, int outlen)
 {
-       int err;
-       u32 in[MLX5_ST_SZ_DW(query_vport_state_in)];
-
-       memset(in, 0, sizeof(in));
+       u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {0};
 
        MLX5_SET(query_vport_state_in, in, opcode,
                 MLX5_CMD_OP_QUERY_VPORT_STATE);
@@ -51,11 +48,7 @@ static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod,
        if (vport)
                MLX5_SET(query_vport_state_in, in, other_vport, 1);
 
-       err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen);
-       if (err)
-               mlx5_core_warn(mdev, "MLX5_CMD_OP_QUERY_VPORT_STATE failed\n");
-
-       return err;
+       return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
 }
 
 u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
@@ -81,58 +74,43 @@ EXPORT_SYMBOL_GPL(mlx5_query_vport_admin_state);
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
                                  u16 vport, u8 state)
 {
-       u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)];
-       u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)];
-       int err;
-
-       memset(in, 0, sizeof(in));
+       u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0};
 
        MLX5_SET(modify_vport_state_in, in, opcode,
                 MLX5_CMD_OP_MODIFY_VPORT_STATE);
        MLX5_SET(modify_vport_state_in, in, op_mod, opmod);
        MLX5_SET(modify_vport_state_in, in, vport_number, vport);
-
        if (vport)
                MLX5_SET(modify_vport_state_in, in, other_vport, 1);
-
        MLX5_SET(modify_vport_state_in, in, admin_state, state);
 
-       err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out,
-                                        sizeof(out));
-       if (err)
-               mlx5_core_warn(mdev, "MLX5_CMD_OP_MODIFY_VPORT_STATE failed\n");
-
-       return err;
+       return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL_GPL(mlx5_modify_vport_admin_state);
 
 static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
                                        u32 *out, int outlen)
 {
-       u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)];
-
-       memset(in, 0, sizeof(in));
+       u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0};
 
        MLX5_SET(query_nic_vport_context_in, in, opcode,
                 MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
-
        MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
        if (vport)
                MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
 
-       return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen);
+       return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
 }
 
 static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in,
                                         int inlen)
 {
-       u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+       u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0};
 
        MLX5_SET(modify_nic_vport_context_in, in, opcode,
                 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
-
-       memset(out, 0, sizeof(out));
-       return mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out));
+       return mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
 }
 
 void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
@@ -147,6 +125,26 @@ void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline);
 
+int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+                                    u16 vport, u8 min_inline)
+{
+       u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0};
+       int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+       void *nic_vport_ctx;
+
+       MLX5_SET(modify_nic_vport_context_in, in,
+                field_select.min_inline, 1);
+       MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+       MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+
+       nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
+                                    in, nic_vport_context);
+       MLX5_SET(nic_vport_context, nic_vport_ctx,
+                min_wqe_inline_mode, min_inline);
+
+       return mlx5_modify_nic_vport_context(mdev, in, inlen);
+}
+
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
                                     u16 vport, u8 *addr)
 {
@@ -254,7 +252,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
                                  u8 addr_list[][ETH_ALEN],
                                  int *list_size)
 {
-       u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)];
+       u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0};
        void *nic_vport_ctx;
        int max_list_size;
        int req_list_size;
@@ -278,7 +276,6 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
        out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
                        req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
 
-       memset(in, 0, sizeof(in));
        out = kzalloc(out_sz, GFP_KERNEL);
        if (!out)
                return -ENOMEM;
@@ -291,7 +288,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
        if (vport)
                MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
 
-       err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz);
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
        if (err)
                goto out;
 
@@ -361,7 +358,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
                ether_addr_copy(curr_mac, addr_list[i]);
        }
 
-       err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out));
+       err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
        kfree(in);
        return err;
 }
@@ -406,7 +403,7 @@ int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev,
        if (vport)
                MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
 
-       err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz);
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
        if (err)
                goto out;
 
@@ -473,7 +470,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
                MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]);
        }
 
-       err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out));
+       err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
        kfree(in);
        return err;
 }
@@ -631,10 +628,6 @@ int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
        if (err)
                goto out;
 
-       err = mlx5_cmd_status_to_err_v2(out);
-       if (err)
-               goto out;
-
        tmp = out + MLX5_ST_SZ_BYTES(query_hca_vport_gid_out);
        gid->global.subnet_prefix = tmp->global.subnet_prefix;
        gid->global.interface_id = tmp->global.interface_id;
@@ -700,10 +693,6 @@ int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport,
        if (err)
                goto out;
 
-       err = mlx5_cmd_status_to_err_v2(out);
-       if (err)
-               goto out;
-
        pkarr = MLX5_ADDR_OF(query_hca_vport_pkey_out, out, pkey);
        for (i = 0; i < nout; i++, pkey++, pkarr += MLX5_ST_SZ_BYTES(pkey))
                *pkey = MLX5_GET_PR(pkey, pkarr, pkey);
@@ -721,7 +710,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
                                 struct mlx5_hca_vport_context *rep)
 {
        int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
-       int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)];
+       int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)] = {0};
        int is_group_manager;
        void *out;
        void *ctx;
@@ -729,7 +718,6 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
 
        is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
 
-       memset(in, 0, sizeof(in));
        out = kzalloc(out_sz, GFP_KERNEL);
        if (!out)
                return -ENOMEM;
@@ -750,9 +738,6 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
                MLX5_SET(query_hca_vport_context_in, in, port_num, port_num);
 
        err = mlx5_cmd_exec(dev, in, sizeof(in), out,  out_sz);
-       if (err)
-               goto ex;
-       err = mlx5_cmd_status_to_err_v2(out);
        if (err)
                goto ex;
 
@@ -969,10 +954,6 @@ int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
                MLX5_SET(query_vport_counter_in, in, port_num, port_num);
 
        err = mlx5_cmd_exec(dev, in, in_sz, out,  out_sz);
-       if (err)
-               goto free;
-       err = mlx5_cmd_status_to_err_v2(out);
-
 free:
        kvfree(in);
        return err;
@@ -1035,11 +1016,6 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
        MLX5_SET(hca_vport_context, ctx, qkey_violation_counter, req->qkey_violation_counter);
        MLX5_SET(hca_vport_context, ctx, pkey_violation_counter, req->pkey_violation_counter);
        err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
-       if (err)
-               goto ex;
-
-       err = mlx5_cmd_status_to_err_v2(out);
-
 ex:
        kfree(in);
        return err;
index e25a73e..07a9ba6 100644 (file)
@@ -46,41 +46,24 @@ void mlx5e_vxlan_init(struct mlx5e_priv *priv)
 
 static int mlx5e_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
 {
-       struct mlx5_outbox_hdr *hdr;
-       int err;
-
-       u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)];
-       u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)];
-
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
+       u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0};
 
        MLX5_SET(add_vxlan_udp_dport_in, in, opcode,
                 MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT);
        MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port);
-
-       err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
-       if (err)
-               return err;
-
-       hdr = (struct mlx5_outbox_hdr *)out;
-       return hdr->status ? -ENOMEM : 0;
+       return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
 }
 
 static int mlx5e_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port)
 {
-       u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)];
-       u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)];
-
-       memset(in, 0, sizeof(in));
-       memset(out, 0, sizeof(out));
+       u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)] = {0};
 
        MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
                 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
        MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port);
-
-       return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out,
-                                         sizeof(out));
+       return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
 }
 
 struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port)
index d3476ea..d2e3297 100644 (file)
@@ -87,6 +87,7 @@ struct mlxsw_rx_listener {
        void (*func)(struct sk_buff *skb, u8 local_port, void *priv);
        u8 local_port;
        u16 trap_id;
+       enum mlxsw_reg_hpkt_action action;
 };
 
 struct mlxsw_event_listener {
index f33b997..af371a8 100644 (file)
@@ -56,6 +56,7 @@
 #define MLXSW_PORT_PHY_BITS_MASK       (MLXSW_PORT_MAX_PHY_PORTS - 1)
 
 #define MLXSW_PORT_CPU_PORT            0x0
+#define MLXSW_PORT_ROUTER_PORT         (MLXSW_PORT_MAX_PHY_PORTS + 2)
 
 #define MLXSW_PORT_DONT_CARE           (MLXSW_PORT_MAX_PORTS)
 
index 1721098..4e2354c 100644 (file)
@@ -591,6 +591,12 @@ static const struct mlxsw_reg_info mlxsw_reg_sfn = {
  */
 MLXSW_ITEM32(reg, sfn, swid, 0x00, 24, 8);
 
+/* reg_sfn_end
+ * Forces the current session to end.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, sfn, end, 0x04, 20, 1);
+
 /* reg_sfn_num_rec
  * Request: Number of learned notifications and aged-out notification
  * records requested.
@@ -605,6 +611,7 @@ static inline void mlxsw_reg_sfn_pack(char *payload)
 {
        MLXSW_REG_ZERO(sfn, payload);
        mlxsw_reg_sfn_swid_set(payload, 0);
+       mlxsw_reg_sfn_end_set(payload, 1);
        mlxsw_reg_sfn_num_rec_set(payload, MLXSW_REG_SFN_REC_MAX_COUNT);
 }
 
@@ -2131,6 +2138,18 @@ MLXSW_ITEM32(reg, ptys, local_port, 0x00, 16, 8);
  */
 MLXSW_ITEM32(reg, ptys, proto_mask, 0x00, 0, 3);
 
+enum {
+       MLXSW_REG_PTYS_AN_STATUS_NA,
+       MLXSW_REG_PTYS_AN_STATUS_OK,
+       MLXSW_REG_PTYS_AN_STATUS_FAIL,
+};
+
+/* reg_ptys_an_status
+ * Autonegotiation status.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4);
+
 #define MLXSW_REG_PTYS_ETH_SPEED_SGMII                 BIT(0)
 #define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX           BIT(1)
 #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4           BIT(2)
@@ -2145,6 +2164,7 @@ MLXSW_ITEM32(reg, ptys, proto_mask, 0x00, 0, 3);
 #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR         BIT(14)
 #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4           BIT(15)
 #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4       BIT(16)
+#define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_SR2           BIT(18)
 #define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR4           BIT(19)
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4          BIT(20)
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4          BIT(21)
@@ -2177,6 +2197,13 @@ MLXSW_ITEM32(reg, ptys, eth_proto_admin, 0x18, 0, 32);
  */
 MLXSW_ITEM32(reg, ptys, eth_proto_oper, 0x24, 0, 32);
 
+/* reg_ptys_eth_proto_lp_advertise
+ * The protocols that were advertised by the link partner during
+ * autonegotiation.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, eth_proto_lp_advertise, 0x30, 0, 32);
+
 static inline void mlxsw_reg_ptys_pack(char *payload, u8 local_port,
                                       u32 proto_admin)
 {
index 1f81689..27bbcaf 100644 (file)
@@ -56,6 +56,7 @@
 #include <generated/utsrelease.h>
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_mirred.h>
+#include <net/netevent.h>
 
 #include "spectrum.h"
 #include "core.h"
@@ -555,8 +556,9 @@ int mlxsw_sp_port_vid_to_fid_set(struct mlxsw_sp_port *mlxsw_sp_port,
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl);
 }
 
-static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
-                                         u16 vid, bool learn_enable)
+int __mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
+                                    u16 vid_begin, u16 vid_end,
+                                    bool learn_enable)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
        char *spvmlr_pl;
@@ -565,13 +567,20 @@ static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
        spvmlr_pl = kmalloc(MLXSW_REG_SPVMLR_LEN, GFP_KERNEL);
        if (!spvmlr_pl)
                return -ENOMEM;
-       mlxsw_reg_spvmlr_pack(spvmlr_pl, mlxsw_sp_port->local_port, vid, vid,
-                             learn_enable);
+       mlxsw_reg_spvmlr_pack(spvmlr_pl, mlxsw_sp_port->local_port, vid_begin,
+                             vid_end, learn_enable);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvmlr), spvmlr_pl);
        kfree(spvmlr_pl);
        return err;
 }
 
+static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
+                                         u16 vid, bool learn_enable)
+{
+       return __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid,
+                                               learn_enable);
+}
+
 static int
 mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port)
 {
@@ -973,10 +982,6 @@ static int mlxsw_sp_port_add_vid(struct net_device *dev,
                        goto err_port_vp_mode_trans;
        }
 
-       err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false);
-       if (err)
-               goto err_port_vid_learning_set;
-
        err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged);
        if (err)
                goto err_port_add_vid;
@@ -984,8 +989,6 @@ static int mlxsw_sp_port_add_vid(struct net_device *dev,
        return 0;
 
 err_port_add_vid:
-       mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
-err_port_vid_learning_set:
        if (list_is_singular(&mlxsw_sp_port->vports_list))
                mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
 err_port_vp_mode_trans:
@@ -1012,8 +1015,6 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev,
 
        mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
 
-       mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
-
        /* Drop FID reference. If this was the last reference the
         * resources will be freed.
         */
@@ -1598,112 +1599,149 @@ static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset)
 }
 
 struct mlxsw_sp_port_link_mode {
+       enum ethtool_link_mode_bit_indices mask_ethtool;
        u32 mask;
-       u32 supported;
-       u32 advertised;
        u32 speed;
 };
 
 static const struct mlxsw_sp_port_link_mode mlxsw_sp_port_link_mode[] = {
        {
                .mask           = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T,
-               .supported      = SUPPORTED_100baseT_Full,
-               .advertised     = ADVERTISED_100baseT_Full,
-               .speed          = 100,
-       },
-       {
-               .mask           = MLXSW_REG_PTYS_ETH_SPEED_100BASE_TX,
-               .speed          = 100,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+               .speed          = SPEED_100,
        },
        {
                .mask           = MLXSW_REG_PTYS_ETH_SPEED_SGMII |
                                  MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX,
-               .supported      = SUPPORTED_1000baseKX_Full,
-               .advertised     = ADVERTISED_1000baseKX_Full,
-               .speed          = 1000,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+               .speed          = SPEED_1000,
        },
        {
                .mask           = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T,
-               .supported      = SUPPORTED_10000baseT_Full,
-               .advertised     = ADVERTISED_10000baseT_Full,
-               .speed          = 10000,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+               .speed          = SPEED_10000,
        },
        {
                .mask           = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 |
                                  MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4,
-               .supported      = SUPPORTED_10000baseKX4_Full,
-               .advertised     = ADVERTISED_10000baseKX4_Full,
-               .speed          = 10000,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
+               .speed          = SPEED_10000,
        },
        {
                .mask           = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR |
                                  MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR |
                                  MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR |
                                  MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR,
-               .supported      = SUPPORTED_10000baseKR_Full,
-               .advertised     = ADVERTISED_10000baseKR_Full,
-               .speed          = 10000,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+               .speed          = SPEED_10000,
        },
        {
                .mask           = MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2,
-               .supported      = SUPPORTED_20000baseKR2_Full,
-               .advertised     = ADVERTISED_20000baseKR2_Full,
-               .speed          = 20000,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT,
+               .speed          = SPEED_20000,
        },
        {
                .mask           = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4,
-               .supported      = SUPPORTED_40000baseCR4_Full,
-               .advertised     = ADVERTISED_40000baseCR4_Full,
-               .speed          = 40000,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
+               .speed          = SPEED_40000,
        },
        {
                .mask           = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4,
-               .supported      = SUPPORTED_40000baseKR4_Full,
-               .advertised     = ADVERTISED_40000baseKR4_Full,
-               .speed          = 40000,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT,
+               .speed          = SPEED_40000,
        },
        {
                .mask           = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4,
-               .supported      = SUPPORTED_40000baseSR4_Full,
-               .advertised     = ADVERTISED_40000baseSR4_Full,
-               .speed          = 40000,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT,
+               .speed          = SPEED_40000,
        },
        {
                .mask           = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4,
-               .supported      = SUPPORTED_40000baseLR4_Full,
-               .advertised     = ADVERTISED_40000baseLR4_Full,
-               .speed          = 40000,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT,
+               .speed          = SPEED_40000,
+       },
+       {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
+               .speed          = SPEED_25000,
+       },
+       {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
+               .speed          = SPEED_25000,
+       },
+       {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
+               .speed          = SPEED_25000,
+       },
+       {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
+               .speed          = SPEED_25000,
+       },
+       {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT,
+               .speed          = SPEED_50000,
+       },
+       {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT,
+               .speed          = SPEED_50000,
        },
        {
-               .mask           = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR |
-                                 MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR |
-                                 MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR,
-               .speed          = 25000,
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_SR2,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
+               .speed          = SPEED_50000,
        },
        {
-               .mask           = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR4 |
-                                 MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2 |
-                                 MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2,
-               .speed          = 50000,
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT,
+               .speed          = SPEED_56000,
+       },
+       {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT,
+               .speed          = SPEED_56000,
+       },
+       {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT,
+               .speed          = SPEED_56000,
        },
        {
                .mask           = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
-               .supported      = SUPPORTED_56000baseKR4_Full,
-               .advertised     = ADVERTISED_56000baseKR4_Full,
-               .speed          = 56000,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT,
+               .speed          = SPEED_56000,
+       },
+       {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
+               .speed          = SPEED_100000,
+       },
+       {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
+               .speed          = SPEED_100000,
+       },
+       {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
+               .speed          = SPEED_100000,
        },
        {
-               .mask           = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 |
-                                 MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
-                                 MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 |
-                                 MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4,
-               .speed          = 100000,
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT,
+               .speed          = SPEED_100000,
        },
 };
 
 #define MLXSW_SP_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp_port_link_mode)
 
-static u32 mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto)
+static void
+mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto,
+                                 struct ethtool_link_ksettings *cmd)
 {
        if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR |
                              MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR |
@@ -1711,43 +1749,29 @@ static u32 mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto)
                              MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 |
                              MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
                              MLXSW_REG_PTYS_ETH_SPEED_SGMII))
-               return SUPPORTED_FIBRE;
+               ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
 
        if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR |
                              MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 |
                              MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 |
                              MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 |
                              MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX))
-               return SUPPORTED_Backplane;
-       return 0;
+               ethtool_link_ksettings_add_link_mode(cmd, supported, Backplane);
 }
 
-static u32 mlxsw_sp_from_ptys_supported_link(u32 ptys_eth_proto)
+static void mlxsw_sp_from_ptys_link(u32 ptys_eth_proto, unsigned long *mode)
 {
-       u32 modes = 0;
        int i;
 
        for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) {
                if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask)
-                       modes |= mlxsw_sp_port_link_mode[i].supported;
+                       __set_bit(mlxsw_sp_port_link_mode[i].mask_ethtool,
+                                 mode);
        }
-       return modes;
-}
-
-static u32 mlxsw_sp_from_ptys_advert_link(u32 ptys_eth_proto)
-{
-       u32 modes = 0;
-       int i;
-
-       for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) {
-               if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask)
-                       modes |= mlxsw_sp_port_link_mode[i].advertised;
-       }
-       return modes;
 }
 
 static void mlxsw_sp_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto,
-                                           struct ethtool_cmd *cmd)
+                                           struct ethtool_link_ksettings *cmd)
 {
        u32 speed = SPEED_UNKNOWN;
        u8 duplex = DUPLEX_UNKNOWN;
@@ -1764,8 +1788,8 @@ static void mlxsw_sp_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto,
                }
        }
 out:
-       ethtool_cmd_speed_set(cmd, speed);
-       cmd->duplex = duplex;
+       cmd->base.speed = speed;
+       cmd->base.duplex = duplex;
 }
 
 static u8 mlxsw_sp_port_connector_port(u32 ptys_eth_proto)
@@ -1790,49 +1814,15 @@ static u8 mlxsw_sp_port_connector_port(u32 ptys_eth_proto)
        return PORT_OTHER;
 }
 
-static int mlxsw_sp_port_get_settings(struct net_device *dev,
-                                     struct ethtool_cmd *cmd)
-{
-       struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
-       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-       char ptys_pl[MLXSW_REG_PTYS_LEN];
-       u32 eth_proto_cap;
-       u32 eth_proto_admin;
-       u32 eth_proto_oper;
-       int err;
-
-       mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
-       err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
-       if (err) {
-               netdev_err(dev, "Failed to get proto");
-               return err;
-       }
-       mlxsw_reg_ptys_unpack(ptys_pl, &eth_proto_cap,
-                             &eth_proto_admin, &eth_proto_oper);
-
-       cmd->supported = mlxsw_sp_from_ptys_supported_port(eth_proto_cap) |
-                        mlxsw_sp_from_ptys_supported_link(eth_proto_cap) |
-                        SUPPORTED_Pause | SUPPORTED_Asym_Pause |
-                        SUPPORTED_Autoneg;
-       cmd->advertising = mlxsw_sp_from_ptys_advert_link(eth_proto_admin);
-       mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev),
-                                       eth_proto_oper, cmd);
-
-       eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
-       cmd->port = mlxsw_sp_port_connector_port(eth_proto_oper);
-       cmd->lp_advertising = mlxsw_sp_from_ptys_advert_link(eth_proto_oper);
-
-       cmd->transceiver = XCVR_INTERNAL;
-       return 0;
-}
-
-static u32 mlxsw_sp_to_ptys_advert_link(u32 advertising)
+static u32
+mlxsw_sp_to_ptys_advert_link(const struct ethtool_link_ksettings *cmd)
 {
        u32 ptys_proto = 0;
        int i;
 
        for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) {
-               if (advertising & mlxsw_sp_port_link_mode[i].advertised)
+               if (test_bit(mlxsw_sp_port_link_mode[i].mask_ethtool,
+                            cmd->link_modes.advertising))
                        ptys_proto |= mlxsw_sp_port_link_mode[i].mask;
        }
        return ptys_proto;
@@ -1862,61 +1852,113 @@ static u32 mlxsw_sp_to_ptys_upper_speed(u32 upper_speed)
        return ptys_proto;
 }
 
-static int mlxsw_sp_port_set_settings(struct net_device *dev,
-                                     struct ethtool_cmd *cmd)
+static void mlxsw_sp_port_get_link_supported(u32 eth_proto_cap,
+                                            struct ethtool_link_ksettings *cmd)
+{
+       ethtool_link_ksettings_add_link_mode(cmd, supported, Asym_Pause);
+       ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg);
+       ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+
+       mlxsw_sp_from_ptys_supported_port(eth_proto_cap, cmd);
+       mlxsw_sp_from_ptys_link(eth_proto_cap, cmd->link_modes.supported);
+}
+
+static void mlxsw_sp_port_get_link_advertise(u32 eth_proto_admin, bool autoneg,
+                                            struct ethtool_link_ksettings *cmd)
+{
+       if (!autoneg)
+               return;
+
+       ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg);
+       mlxsw_sp_from_ptys_link(eth_proto_admin, cmd->link_modes.advertising);
+}
+
+static void
+mlxsw_sp_port_get_link_lp_advertise(u32 eth_proto_lp, u8 autoneg_status,
+                                   struct ethtool_link_ksettings *cmd)
+{
+       if (autoneg_status != MLXSW_REG_PTYS_AN_STATUS_OK || !eth_proto_lp)
+               return;
+
+       ethtool_link_ksettings_add_link_mode(cmd, lp_advertising, Autoneg);
+       mlxsw_sp_from_ptys_link(eth_proto_lp, cmd->link_modes.lp_advertising);
+}
+
+static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev,
+                                           struct ethtool_link_ksettings *cmd)
 {
+       u32 eth_proto_cap, eth_proto_admin, eth_proto_oper, eth_proto_lp;
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
        char ptys_pl[MLXSW_REG_PTYS_LEN];
-       u32 speed;
-       u32 eth_proto_new;
-       u32 eth_proto_cap;
-       u32 eth_proto_admin;
+       u8 autoneg_status;
+       bool autoneg;
        int err;
 
-       speed = ethtool_cmd_speed(cmd);
+       autoneg = mlxsw_sp_port->link.autoneg;
+       mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
+       err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
+       if (err)
+               return err;
+       mlxsw_reg_ptys_unpack(ptys_pl, &eth_proto_cap, &eth_proto_admin,
+                             &eth_proto_oper);
+
+       mlxsw_sp_port_get_link_supported(eth_proto_cap, cmd);
+
+       mlxsw_sp_port_get_link_advertise(eth_proto_admin, autoneg, cmd);
+
+       eth_proto_lp = mlxsw_reg_ptys_eth_proto_lp_advertise_get(ptys_pl);
+       autoneg_status = mlxsw_reg_ptys_an_status_get(ptys_pl);
+       mlxsw_sp_port_get_link_lp_advertise(eth_proto_lp, autoneg_status, cmd);
+
+       cmd->base.autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE;
+       cmd->base.port = mlxsw_sp_port_connector_port(eth_proto_oper);
+       mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), eth_proto_oper,
+                                       cmd);
+
+       return 0;
+}
 
-       eth_proto_new = cmd->autoneg == AUTONEG_ENABLE ?
-               mlxsw_sp_to_ptys_advert_link(cmd->advertising) :
-               mlxsw_sp_to_ptys_speed(speed);
+static int
+mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
+                                const struct ethtool_link_ksettings *cmd)
+{
+       struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char ptys_pl[MLXSW_REG_PTYS_LEN];
+       u32 eth_proto_cap, eth_proto_new;
+       bool autoneg;
+       int err;
 
        mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
        err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
-       if (err) {
-               netdev_err(dev, "Failed to get proto");
+       if (err)
                return err;
-       }
-       mlxsw_reg_ptys_unpack(ptys_pl, &eth_proto_cap, &eth_proto_admin, NULL);
+       mlxsw_reg_ptys_unpack(ptys_pl, &eth_proto_cap, NULL, NULL);
+
+       autoneg = cmd->base.autoneg == AUTONEG_ENABLE;
+       eth_proto_new = autoneg ?
+               mlxsw_sp_to_ptys_advert_link(cmd) :
+               mlxsw_sp_to_ptys_speed(cmd->base.speed);
 
        eth_proto_new = eth_proto_new & eth_proto_cap;
        if (!eth_proto_new) {
-               netdev_err(dev, "Not supported proto admin requested");
+               netdev_err(dev, "No supported speed requested\n");
                return -EINVAL;
        }
-       if (eth_proto_new == eth_proto_admin)
-               return 0;
 
        mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, eth_proto_new);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
-       if (err) {
-               netdev_err(dev, "Failed to set proto admin");
+       if (err)
                return err;
-       }
 
        if (!netif_running(dev))
                return 0;
 
-       err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
-       if (err) {
-               netdev_err(dev, "Failed to set admin status");
-               return err;
-       }
+       mlxsw_sp_port->link.autoneg = autoneg;
 
-       err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true);
-       if (err) {
-               netdev_err(dev, "Failed to set admin status");
-               return err;
-       }
+       mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
+       mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true);
 
        return 0;
 }
@@ -1930,8 +1972,8 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
        .set_phys_id            = mlxsw_sp_port_set_phys_id,
        .get_ethtool_stats      = mlxsw_sp_port_get_stats,
        .get_sset_count         = mlxsw_sp_port_get_sset_count,
-       .get_settings           = mlxsw_sp_port_get_settings,
-       .set_settings           = mlxsw_sp_port_set_settings,
+       .get_link_ksettings     = mlxsw_sp_port_get_link_ksettings,
+       .set_link_ksettings     = mlxsw_sp_port_set_link_ksettings,
 };
 
 static int
@@ -2081,6 +2123,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
        mlxsw_sp_port->mapping.module = module;
        mlxsw_sp_port->mapping.width = width;
        mlxsw_sp_port->mapping.lane = lane;
+       mlxsw_sp_port->link.autoneg = 1;
        bytes = DIV_ROUND_UP(VLAN_N_VID, BITS_PER_BYTE);
        mlxsw_sp_port->active_vlans = kzalloc(bytes, GFP_KERNEL);
        if (!mlxsw_sp_port->active_vlans) {
@@ -2105,6 +2148,13 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
        dev->netdev_ops = &mlxsw_sp_port_netdev_ops;
        dev->ethtool_ops = &mlxsw_sp_port_ethtool_ops;
 
+       err = mlxsw_sp_port_swid_set(mlxsw_sp_port, 0);
+       if (err) {
+               dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set SWID\n",
+                       mlxsw_sp_port->local_port);
+               goto err_port_swid_set;
+       }
+
        err = mlxsw_sp_port_dev_addr_init(mlxsw_sp_port);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unable to init port mac address\n",
@@ -2130,13 +2180,6 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
                goto err_port_system_port_mapping_set;
        }
 
-       err = mlxsw_sp_port_swid_set(mlxsw_sp_port, 0);
-       if (err) {
-               dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set SWID\n",
-                       mlxsw_sp_port->local_port);
-               goto err_port_swid_set;
-       }
-
        err = mlxsw_sp_port_speed_by_width_set(mlxsw_sp_port, width);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to enable speeds\n",
@@ -2218,10 +2261,10 @@ err_port_buffers_init:
 err_port_admin_status_set:
 err_port_mtu_set:
 err_port_speed_by_width_set:
-       mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
-err_port_swid_set:
 err_port_system_port_mapping_set:
 err_dev_addr_init:
+       mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
+err_port_swid_set:
        free_percpu(mlxsw_sp_port->pcpu_stats);
 err_alloc_stats:
        kfree(mlxsw_sp_port->untagged_vlans);
@@ -2570,123 +2613,47 @@ static void mlxsw_sp_rx_listener_func(struct sk_buff *skb, u8 local_port,
        netif_receive_skb(skb);
 }
 
+static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port,
+                                          void *priv)
+{
+       skb->offload_fwd_mark = 1;
+       return mlxsw_sp_rx_listener_func(skb, local_port, priv);
+}
+
+#define MLXSW_SP_RXL(_func, _trap_id, _action)                 \
+       {                                                       \
+               .func = _func,                                  \
+               .local_port = MLXSW_PORT_DONT_CARE,             \
+               .trap_id = MLXSW_TRAP_ID_##_trap_id,            \
+               .action = MLXSW_REG_HPKT_ACTION_##_action,      \
+       }
+
 static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = {
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_FDB_MC,
-       },
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, FDB_MC, TRAP_TO_CPU),
        /* Traps for specific L2 packet types, not trapped as FDB MC */
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_STP,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_LACP,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_EAPOL,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_LLDP,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_MMRP,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_MVRP,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_RPVST,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_DHCP,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_IGMP_QUERY,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_IGMP_V1_REPORT,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_IGMP_V2_REPORT,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_IGMP_V2_LEAVE,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_IGMP_V3_REPORT,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_ARPBC,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_ARPUC,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_MTUERROR,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_TTLERROR,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_LBERROR,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_OSPF,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_IP2ME,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_RTR_INGRESS0,
-       },
-       {
-               .func = mlxsw_sp_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_HOST_MISS_IPV4,
-       },
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, STP, TRAP_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LACP, TRAP_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, EAPOL, TRAP_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LLDP, TRAP_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MMRP, TRAP_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MVRP, TRAP_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RPVST, TRAP_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, DHCP, MIRROR_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, IGMP_QUERY, MIRROR_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V1_REPORT, TRAP_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_REPORT, TRAP_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_LEAVE, TRAP_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V3_REPORT, TRAP_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, ARPBC, MIRROR_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, ARPUC, MIRROR_TO_CPU),
+       /* L3 traps */
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MTUERROR, TRAP_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, TTLERROR, TRAP_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LBERROR, TRAP_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, OSPF, TRAP_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IP2ME, TRAP_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RTR_INGRESS0, TRAP_TO_CPU),
+       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, HOST_MISS_IPV4, TRAP_TO_CPU),
 };
 
 static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
@@ -2713,7 +2680,7 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
                if (err)
                        goto err_rx_listener_register;
 
-               mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU,
+               mlxsw_reg_hpkt_pack(hpkt_pl, mlxsw_sp_rx_listener[i].action,
                                    mlxsw_sp_rx_listener[i].trap_id);
                err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
                if (err)
@@ -3324,6 +3291,39 @@ static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp,
        return mlxsw_sp_fid_find(mlxsw_sp, fid);
 }
 
+static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid)
+{
+       return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID :
+              MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST;
+}
+
+static u16 mlxsw_sp_flood_table_index_get(u16 fid)
+{
+       return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid;
+}
+
+static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid,
+                                         bool set)
+{
+       enum mlxsw_flood_table_type table_type;
+       char *sftr_pl;
+       u16 index;
+       int err;
+
+       sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL);
+       if (!sftr_pl)
+               return -ENOMEM;
+
+       table_type = mlxsw_sp_flood_table_type_get(fid);
+       index = mlxsw_sp_flood_table_index_get(fid);
+       mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, index, table_type,
+                           1, MLXSW_PORT_ROUTER_PORT, set);
+       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
+
+       kfree(sftr_pl);
+       return err;
+}
+
 static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid)
 {
        if (mlxsw_sp_fid_is_vfid(fid))
@@ -3360,10 +3360,14 @@ static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp,
        if (rif == MLXSW_SP_RIF_MAX)
                return -ERANGE;
 
-       err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true);
+       err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true);
        if (err)
                return err;
 
+       err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true);
+       if (err)
+               goto err_rif_bridge_op;
+
        err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true);
        if (err)
                goto err_rif_fdb_op;
@@ -3385,6 +3389,8 @@ err_rif_alloc:
        mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
 err_rif_fdb_op:
        mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false);
+err_rif_bridge_op:
+       mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
        return err;
 }
 
@@ -3404,6 +3410,8 @@ void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
 
        mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false);
 
+       mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
+
        netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif);
 }
 
@@ -4500,18 +4508,26 @@ static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = {
        .priority = 10, /* Must be called before FIB notifier block */
 };
 
+static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = {
+       .notifier_call = mlxsw_sp_router_netevent_event,
+};
+
 static int __init mlxsw_sp_module_init(void)
 {
        int err;
 
        register_netdevice_notifier(&mlxsw_sp_netdevice_nb);
        register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
+       register_netevent_notifier(&mlxsw_sp_router_netevent_nb);
+
        err = mlxsw_core_driver_register(&mlxsw_sp_driver);
        if (err)
                goto err_core_driver_register;
        return 0;
 
 err_core_driver_register:
+       unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
+       unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
        unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
        return err;
 }
@@ -4519,6 +4535,7 @@ err_core_driver_register:
 static void __exit mlxsw_sp_module_exit(void)
 {
        mlxsw_core_driver_unregister(&mlxsw_sp_driver);
+       unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
        unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
        unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
 }
index ab3feb8..969c250 100644 (file)
@@ -341,7 +341,8 @@ struct mlxsw_sp_port {
        } vport;
        struct {
                u8 tx_pause:1,
-                  rx_pause:1;
+                  rx_pause:1,
+                  autoneg:1;
        } link;
        struct {
                struct ieee_ets *ets;
@@ -558,6 +559,9 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu,
 int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
                                  enum mlxsw_reg_qeec_hr hr, u8 index,
                                  u8 next_index, u32 maxrate);
+int __mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
+                                    u16 vid_begin, u16 vid_end,
+                                    bool learn_enable);
 
 #ifdef CONFIG_MLXSW_SPECTRUM_DCB
 
@@ -587,6 +591,8 @@ int mlxsw_sp_router_neigh_construct(struct net_device *dev,
                                    struct neighbour *n);
 void mlxsw_sp_router_neigh_destroy(struct net_device *dev,
                                   struct neighbour *n);
+int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
+                                  unsigned long event, void *ptr);
 
 int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count);
 void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
index 237418a..953b214 100644 (file)
@@ -717,22 +717,18 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port,
        u8 local_port = mlxsw_sp_port->local_port;
        u8 pg_buff = tc_index;
        enum mlxsw_reg_sbxx_dir dir = pool_type;
-       u8 pool = pool_index;
+       u8 pool = pool_get(pool_index);
        u32 max_buff;
        int err;
 
+       if (dir != dir_get(pool_index))
+               return -EINVAL;
+
        err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir,
                                       threshold, &max_buff);
        if (err)
                return err;
 
-       if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS) {
-               if (pool < MLXSW_SP_SB_POOL_COUNT)
-                       return -EINVAL;
-               pool -= MLXSW_SP_SB_POOL_COUNT;
-       } else if (pool >= MLXSW_SP_SB_POOL_COUNT) {
-               return -EINVAL;
-       }
        return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, dir,
                                    0, max_buff, pool);
 }
index 90bb93b..3f5c51d 100644 (file)
@@ -107,6 +107,7 @@ mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
 }
 
 struct mlxsw_sp_fib_key {
+       struct net_device *dev;
        unsigned char addr[sizeof(struct in6_addr)];
        unsigned char prefix_len;
 };
@@ -123,7 +124,7 @@ struct mlxsw_sp_fib_entry {
        struct rhash_head ht_node;
        struct mlxsw_sp_fib_key key;
        enum mlxsw_sp_fib_entry_type type;
-       u8 added:1;
+       unsigned int ref_count;
        u16 rif; /* used for action local */
        struct mlxsw_sp_vr *vr;
        struct list_head nexthop_group_node;
@@ -171,13 +172,15 @@ static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib,
 
 static struct mlxsw_sp_fib_entry *
 mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib *fib, const void *addr,
-                         size_t addr_len, unsigned char prefix_len)
+                         size_t addr_len, unsigned char prefix_len,
+                         struct net_device *dev)
 {
        struct mlxsw_sp_fib_entry *fib_entry;
 
        fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
        if (!fib_entry)
                return NULL;
+       fib_entry->key.dev = dev;
        memcpy(fib_entry->key.addr, addr, addr_len);
        fib_entry->key.prefix_len = prefix_len;
        return fib_entry;
@@ -190,10 +193,13 @@ static void mlxsw_sp_fib_entry_destroy(struct mlxsw_sp_fib_entry *fib_entry)
 
 static struct mlxsw_sp_fib_entry *
 mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib *fib, const void *addr,
-                         size_t addr_len, unsigned char prefix_len)
+                         size_t addr_len, unsigned char prefix_len,
+                         struct net_device *dev)
 {
-       struct mlxsw_sp_fib_key key = {{ 0 } };
+       struct mlxsw_sp_fib_key key;
 
+       memset(&key, 0, sizeof(key));
+       key.dev = dev;
        memcpy(key.addr, addr, addr_len);
        key.prefix_len = prefix_len;
        return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
@@ -657,7 +663,7 @@ int mlxsw_sp_router_neigh_construct(struct net_device *dev,
                return 0;
        }
 
-       r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+       r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
        if (WARN_ON(!r))
                return -EINVAL;
 
@@ -938,8 +944,8 @@ static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work)
        mlxsw_sp_port_dev_put(mlxsw_sp_port);
 }
 
-static int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
-                                         unsigned long event, void *ptr)
+int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
+                                  unsigned long event, void *ptr)
 {
        struct mlxsw_sp_neigh_entry *neigh_entry;
        struct mlxsw_sp_port *mlxsw_sp_port;
@@ -1009,10 +1015,6 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
        return NOTIFY_DONE;
 }
 
-static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = {
-       .notifier_call = mlxsw_sp_router_netevent_event,
-};
-
 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
 {
        int err;
@@ -1027,10 +1029,6 @@ static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
         */
        mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
 
-       err = register_netevent_notifier(&mlxsw_sp_router_netevent_nb);
-       if (err)
-               goto err_register_netevent_notifier;
-
        /* Create the delayed works for the activity_update */
        INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
                          mlxsw_sp_router_neighs_update_work);
@@ -1039,17 +1037,12 @@ static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
        mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
        mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
        return 0;
-
-err_register_netevent_notifier:
-       rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
-       return err;
 }
 
 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
 {
        cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
        cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
-       unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
        rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
 }
 
@@ -1524,7 +1517,14 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
                return err;
        mlxsw_sp_lpm_init(mlxsw_sp);
        mlxsw_sp_vrs_init(mlxsw_sp);
-       return mlxsw_sp_neigh_init(mlxsw_sp);
+       err = mlxsw_sp_neigh_init(mlxsw_sp);
+       if (err)
+               goto err_neigh_init;
+       return 0;
+
+err_neigh_init:
+       __mlxsw_sp_router_fini(mlxsw_sp);
+       return err;
 }
 
 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
@@ -1626,11 +1626,8 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
                                     struct mlxsw_sp_fib_entry *fib_entry)
 {
-       enum mlxsw_reg_ralue_op op;
-
-       op = !fib_entry->added ? MLXSW_REG_RALUE_OP_WRITE_WRITE :
-                                MLXSW_REG_RALUE_OP_WRITE_UPDATE;
-       return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
+       return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
+                                    MLXSW_REG_RALUE_OP_WRITE_WRITE);
 }
 
 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
@@ -1695,34 +1692,93 @@ mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp,
        mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
 }
 
-static int
-mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port,
-                                const struct switchdev_obj_ipv4_fib *fib4,
-                                struct switchdev_trans *trans)
+static struct mlxsw_sp_fib_entry *
+mlxsw_sp_fib_entry_get(struct mlxsw_sp *mlxsw_sp,
+                      const struct switchdev_obj_ipv4_fib *fib4)
 {
-       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-       struct mlxsw_sp_router_fib4_add_info *info;
        struct mlxsw_sp_fib_entry *fib_entry;
+       struct fib_info *fi = fib4->fi;
        struct mlxsw_sp_vr *vr;
        int err;
 
        vr = mlxsw_sp_vr_get(mlxsw_sp, fib4->dst_len, fib4->tb_id,
                             MLXSW_SP_L3_PROTO_IPV4);
        if (IS_ERR(vr))
-               return PTR_ERR(vr);
+               return ERR_CAST(vr);
 
+       fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst,
+                                             sizeof(fib4->dst),
+                                             fib4->dst_len, fi->fib_dev);
+       if (fib_entry) {
+               /* Already exists, just take a reference */
+               fib_entry->ref_count++;
+               return fib_entry;
+       }
        fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fib4->dst,
-                                             sizeof(fib4->dst), fib4->dst_len);
+                                             sizeof(fib4->dst),
+                                             fib4->dst_len, fi->fib_dev);
        if (!fib_entry) {
                err = -ENOMEM;
                goto err_fib_entry_create;
        }
        fib_entry->vr = vr;
+       fib_entry->ref_count = 1;
 
        err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fib4, fib_entry);
        if (err)
                goto err_fib4_entry_init;
 
+       return fib_entry;
+
+err_fib4_entry_init:
+       mlxsw_sp_fib_entry_destroy(fib_entry);
+err_fib_entry_create:
+       mlxsw_sp_vr_put(mlxsw_sp, vr);
+
+       return ERR_PTR(err);
+}
+
+static struct mlxsw_sp_fib_entry *
+mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp,
+                       const struct switchdev_obj_ipv4_fib *fib4)
+{
+       struct mlxsw_sp_vr *vr;
+
+       vr = mlxsw_sp_vr_find(mlxsw_sp, fib4->tb_id, MLXSW_SP_L3_PROTO_IPV4);
+       if (!vr)
+               return NULL;
+
+       return mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst,
+                                        sizeof(fib4->dst), fib4->dst_len,
+                                        fib4->fi->fib_dev);
+}
+
+void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp,
+                           struct mlxsw_sp_fib_entry *fib_entry)
+{
+       struct mlxsw_sp_vr *vr = fib_entry->vr;
+
+       if (--fib_entry->ref_count == 0) {
+               mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
+               mlxsw_sp_fib_entry_destroy(fib_entry);
+       }
+       mlxsw_sp_vr_put(mlxsw_sp, vr);
+}
+
+static int
+mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port,
+                                const struct switchdev_obj_ipv4_fib *fib4,
+                                struct switchdev_trans *trans)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       struct mlxsw_sp_router_fib4_add_info *info;
+       struct mlxsw_sp_fib_entry *fib_entry;
+       int err;
+
+       fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fib4);
+       if (IS_ERR(fib_entry))
+               return PTR_ERR(fib_entry);
+
        info = kmalloc(sizeof(*info), GFP_KERNEL);
        if (!info) {
                err = -ENOMEM;
@@ -1736,11 +1792,7 @@ mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port,
        return 0;
 
 err_alloc_info:
-       mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
-err_fib4_entry_init:
-       mlxsw_sp_fib_entry_destroy(fib_entry);
-err_fib_entry_create:
-       mlxsw_sp_vr_put(mlxsw_sp, vr);
+       mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
        return err;
 }
 
@@ -1759,11 +1811,14 @@ mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port *mlxsw_sp_port,
        fib_entry = info->fib_entry;
        kfree(info);
 
+       if (fib_entry->ref_count != 1)
+               return 0;
+
        vr = fib_entry->vr;
-       err = mlxsw_sp_fib_entry_insert(fib_entry->vr->fib, fib_entry);
+       err = mlxsw_sp_fib_entry_insert(vr->fib, fib_entry);
        if (err)
                goto err_fib_entry_insert;
-       err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+       err = mlxsw_sp_fib_entry_update(mlxsw_sp_port->mlxsw_sp, fib_entry);
        if (err)
                goto err_fib_entry_add;
        return 0;
@@ -1771,9 +1826,7 @@ mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port *mlxsw_sp_port,
 err_fib_entry_add:
        mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
 err_fib_entry_insert:
-       mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
-       mlxsw_sp_fib_entry_destroy(fib_entry);
-       mlxsw_sp_vr_put(mlxsw_sp, vr);
+       mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
        return err;
 }
 
@@ -1793,23 +1846,18 @@ int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port,
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
        struct mlxsw_sp_fib_entry *fib_entry;
-       struct mlxsw_sp_vr *vr;
 
-       vr = mlxsw_sp_vr_find(mlxsw_sp, fib4->tb_id, MLXSW_SP_L3_PROTO_IPV4);
-       if (!vr) {
-               dev_warn(mlxsw_sp->bus_info->dev, "Failed to find virtual router for FIB4 entry being removed.\n");
-               return -ENOENT;
-       }
-       fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst,
-                                             sizeof(fib4->dst), fib4->dst_len);
+       fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fib4);
        if (!fib_entry) {
                dev_warn(mlxsw_sp->bus_info->dev, "Failed to find FIB4 entry being removed.\n");
                return -ENOENT;
        }
-       mlxsw_sp_fib_entry_del(mlxsw_sp_port->mlxsw_sp, fib_entry);
-       mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
-       mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
-       mlxsw_sp_fib_entry_destroy(fib_entry);
-       mlxsw_sp_vr_put(mlxsw_sp, vr);
+
+       if (fib_entry->ref_count == 1) {
+               mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
+               mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, fib_entry);
+       }
+
+       mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
        return 0;
 }
index d1b59cd..7186c48 100644 (file)
@@ -167,8 +167,8 @@ static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port,
 }
 
 static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port,
-                                    u16 idx_begin, u16 idx_end, bool set,
-                                    bool only_uc)
+                                    u16 idx_begin, u16 idx_end, bool uc_set,
+                                    bool bm_set)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
        u16 local_port = mlxsw_sp_port->local_port;
@@ -187,28 +187,22 @@ static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port,
                return -ENOMEM;
 
        mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, idx_begin,
-                           table_type, range, local_port, set);
+                           table_type, range, local_port, uc_set);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
        if (err)
                goto buffer_out;
 
-       /* Flooding control allows one to decide whether a given port will
-        * flood unicast traffic for which there is no FDB entry.
-        */
-       if (only_uc)
-               goto buffer_out;
-
        mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, idx_begin,
-                           table_type, range, local_port, set);
+                           table_type, range, local_port, bm_set);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
        if (err)
                goto err_flood_bm_set;
-       else
-               goto buffer_out;
+
+       goto buffer_out;
 
 err_flood_bm_set:
        mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, idx_begin,
-                           table_type, range, local_port, !set);
+                           table_type, range, local_port, !uc_set);
        mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
 buffer_out:
        kfree(sftr_pl);
@@ -257,16 +251,43 @@ int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid,
         * the start of the vFIDs range.
         */
        vfid = mlxsw_sp_fid_to_vfid(fid);
-       return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set,
-                                        false);
+       return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set, set);
+}
+
+static int mlxsw_sp_port_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
+                                     bool set)
+{
+       u16 vid;
+       int err;
+
+       if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
+               vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port);
+
+               return __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid,
+                                                       set);
+       }
+
+       for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) {
+               err = __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid,
+                                                      set);
+               if (err)
+                       goto err_port_vid_learning_set;
+       }
+
+       return 0;
+
+err_port_vid_learning_set:
+       for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID)
+               __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid, !set);
+       return err;
 }
 
 static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port,
                                           struct switchdev_trans *trans,
                                           unsigned long brport_flags)
 {
+       unsigned long learning = mlxsw_sp_port->learning ? BR_LEARNING : 0;
        unsigned long uc_flood = mlxsw_sp_port->uc_flood ? BR_FLOOD : 0;
-       bool set;
        int err;
 
        if (!mlxsw_sp_port->bridged)
@@ -276,17 +297,30 @@ static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port,
                return 0;
 
        if ((uc_flood ^ brport_flags) & BR_FLOOD) {
-               set = mlxsw_sp_port->uc_flood ? false : true;
-               err = mlxsw_sp_port_uc_flood_set(mlxsw_sp_port, set);
+               err = mlxsw_sp_port_uc_flood_set(mlxsw_sp_port,
+                                                !mlxsw_sp_port->uc_flood);
                if (err)
                        return err;
        }
 
+       if ((learning ^ brport_flags) & BR_LEARNING) {
+               err = mlxsw_sp_port_learning_set(mlxsw_sp_port,
+                                                !mlxsw_sp_port->learning);
+               if (err)
+                       goto err_port_learning_set;
+       }
+
        mlxsw_sp_port->uc_flood = brport_flags & BR_FLOOD ? 1 : 0;
        mlxsw_sp_port->learning = brport_flags & BR_LEARNING ? 1 : 0;
        mlxsw_sp_port->learning_sync = brport_flags & BR_LEARNING_SYNC ? 1 : 0;
 
        return 0;
+
+err_port_learning_set:
+       if ((uc_flood ^ brport_flags) & BR_FLOOD)
+               mlxsw_sp_port_uc_flood_set(mlxsw_sp_port,
+                                          mlxsw_sp_port->uc_flood);
+       return err;
 }
 
 static int mlxsw_sp_ageing_set(struct mlxsw_sp *mlxsw_sp, u32 ageing_time)
@@ -460,6 +494,9 @@ static int __mlxsw_sp_port_fid_join(struct mlxsw_sp_port *mlxsw_sp_port,
 {
        struct mlxsw_sp_fid *f;
 
+       if (test_bit(fid, mlxsw_sp_port->active_vlans))
+               return 0;
+
        f = mlxsw_sp_fid_find(mlxsw_sp_port->mlxsw_sp, fid);
        if (!f) {
                f = mlxsw_sp_fid_create(mlxsw_sp_port->mlxsw_sp, fid);
@@ -517,7 +554,7 @@ static int mlxsw_sp_port_fid_join(struct mlxsw_sp_port *mlxsw_sp_port,
        }
 
        err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end,
-                                       true, false);
+                                       mlxsw_sp_port->uc_flood, true);
        if (err)
                goto err_port_flood_set;
 
@@ -635,6 +672,27 @@ static int __mlxsw_sp_port_vlans_set(struct mlxsw_sp_port *mlxsw_sp_port,
        return 0;
 }
 
+static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
+                                         u16 vid_begin, u16 vid_end,
+                                         bool learn_enable)
+{
+       u16 vid, vid_e;
+       int err;
+
+       for (vid = vid_begin; vid <= vid_end;
+            vid += MLXSW_REG_SPVMLR_REC_MAX_COUNT) {
+               vid_e = min((u16) (vid + MLXSW_REG_SPVMLR_REC_MAX_COUNT - 1),
+                           vid_end);
+
+               err = __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid,
+                                                      vid_e, learn_enable);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
 static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port,
                                     u16 vid_begin, u16 vid_end,
                                     bool flag_untagged, bool flag_pvid)
@@ -675,6 +733,14 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port,
                }
        }
 
+       err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end,
+                                            mlxsw_sp_port->learning);
+       if (err) {
+               netdev_err(dev, "Failed to set learning for VIDs %d-%d\n",
+                          vid_begin, vid_end);
+               goto err_port_vid_learning_set;
+       }
+
        /* Changing activity bits only if HW operation succeded */
        for (vid = vid_begin; vid <= vid_end; vid++) {
                set_bit(vid, mlxsw_sp_port->active_vlans);
@@ -697,6 +763,9 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port,
 err_port_stp_state_set:
        for (vid = vid_begin; vid <= vid_end; vid++)
                clear_bit(vid, mlxsw_sp_port->active_vlans);
+       mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end,
+                                      false);
+err_port_vid_learning_set:
        if (old_pvid != mlxsw_sp_port->pvid)
                mlxsw_sp_port_pvid_set(mlxsw_sp_port, old_pvid);
 err_port_pvid_set:
@@ -1001,29 +1070,20 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev,
 static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
                                     u16 vid_begin, u16 vid_end)
 {
-       struct net_device *dev = mlxsw_sp_port->dev;
        u16 vid, pvid;
-       int err;
 
        if (!mlxsw_sp_port->bridged)
                return -EINVAL;
 
-       err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end,
-                                       false, false);
-       if (err) {
-               netdev_err(dev, "Unable to del VIDs %d-%d\n", vid_begin,
-                          vid_end);
-               return err;
-       }
+       mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end,
+                                      false);
 
        pvid = mlxsw_sp_port->pvid;
-       if (pvid >= vid_begin && pvid <= vid_end) {
-               err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0);
-               if (err) {
-                       netdev_err(dev, "Unable to del PVID %d\n", pvid);
-                       return err;
-               }
-       }
+       if (pvid >= vid_begin && pvid <= vid_end)
+               mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0);
+
+       __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false,
+                                 false);
 
        mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end);
 
@@ -1366,8 +1426,6 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
                vid = fid;
        }
 
-       adding = adding && mlxsw_sp_port->learning;
-
 do_fdb_op:
        err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid,
                                      adding, true);
@@ -1429,8 +1487,6 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp,
                vid = fid;
        }
 
-       adding = adding && mlxsw_sp_port->learning;
-
 do_fdb_op:
        err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid,
                                          adding, true);
@@ -1496,20 +1552,18 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work)
        mlxsw_sp = container_of(work, struct mlxsw_sp, fdb_notify.dw.work);
 
        rtnl_lock();
-       do {
-               mlxsw_reg_sfn_pack(sfn_pl);
-               err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl);
-               if (err) {
-                       dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get FDB notifications\n");
-                       break;
-               }
-               num_rec = mlxsw_reg_sfn_num_rec_get(sfn_pl);
-               for (i = 0; i < num_rec; i++)
-                       mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i);
+       mlxsw_reg_sfn_pack(sfn_pl);
+       err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl);
+       if (err) {
+               dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get FDB notifications\n");
+               goto out;
+       }
+       num_rec = mlxsw_reg_sfn_num_rec_get(sfn_pl);
+       for (i = 0; i < num_rec; i++)
+               mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i);
 
-       } while (num_rec);
+out:
        rtnl_unlock();
-
        kfree(sfn_pl);
        mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp);
 }
index eb807b0..569ade6 100644 (file)
@@ -134,7 +134,7 @@ static int lnksts = 0;              /* CFG_LNKSTS bit polarity */
 
 /* tunables */
 #define RX_BUF_SIZE    1500    /* 8192 */
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
 #define NS83820_VLAN_ACCEL_SUPPORT
 #endif
 
index 88678c1..252e492 100644 (file)
@@ -41,7 +41,6 @@
  *          Chris Telfer <chris.telfer@netronome.com>
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -1441,10 +1440,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 
                nfp_net_set_hash(nn->netdev, skb, rxd);
 
-               /* Pad small frames to minimum */
-               if (skb_put_padto(skb, 60))
-                       break;
-
                /* Stats update */
                u64_stats_update_begin(&r_vec->rx_sync);
                r_vec->rx_pkts++;
index 7d7933d..4c98972 100644 (file)
@@ -40,7 +40,6 @@
  *          Brad Petrus <brad.petrus@netronome.com>
  */
 
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
index 37abef0..f7062cb 100644 (file)
@@ -38,7 +38,6 @@
  *         Rolf Neugebauer <rolf.neugebauer@netronome.com>
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -134,7 +133,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
        }
 
        nfp_net_get_fw_version(&fw_ver, ctrl_bar);
-       if (fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) {
+       if (fw_ver.resv || fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) {
                dev_err(&pdev->dev, "Unknown Firmware ABI %d.%d.%d.%d\n",
                        fw_ver.resv, fw_ver.class, fw_ver.major, fw_ver.minor);
                err = -EINVAL;
@@ -142,9 +141,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
        }
 
        /* Determine stride */
-       if (nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 0) ||
-           nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 1) ||
-           nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0x12, 0x48)) {
+       if (nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 1)) {
                stride = 2;
                tx_bar_no = NFP_NET_Q0_BAR;
                rx_bar_no = NFP_NET_Q1_BAR;
index 87b7b81..712d8bc 100644 (file)
@@ -751,7 +751,7 @@ static void netdev_rx(struct net_device *dev)
                                dev_err(&pdev->dev, "rx crc err\n");
                                ether->stats.rx_crc_errors++;
                        } else if (status & RXDS_ALIE) {
-                               dev_err(&pdev->dev, "rx aligment err\n");
+                               dev_err(&pdev->dev, "rx alignment err\n");
                                ether->stats.rx_frame_errors++;
                        } else if (status & RXDS_PTLE) {
                                dev_err(&pdev->dev, "rx longer err\n");
index 4d4ecba..8e13ec8 100644 (file)
@@ -475,14 +475,6 @@ static void __lpc_get_mac(struct netdata_local *pldat, u8 *mac)
        mac[5] = tmp >> 8;
 }
 
-static void __lpc_eth_clock_enable(struct netdata_local *pldat, bool enable)
-{
-       if (enable)
-               clk_prepare_enable(pldat->clk);
-       else
-               clk_disable_unprepare(pldat->clk);
-}
-
 static void __lpc_params_setup(struct netdata_local *pldat)
 {
        u32 tmp;
@@ -1056,7 +1048,7 @@ static int lpc_eth_close(struct net_device *ndev)
        writel(0, LPC_ENET_MAC2(pldat->net_base));
        spin_unlock_irqrestore(&pldat->lock, flags);
 
-       __lpc_eth_clock_enable(pldat, false);
+       clk_disable_unprepare(pldat->clk);
 
        return 0;
 }
@@ -1197,11 +1189,14 @@ static int lpc_eth_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
 static int lpc_eth_open(struct net_device *ndev)
 {
        struct netdata_local *pldat = netdev_priv(ndev);
+       int ret;
 
        if (netif_msg_ifup(pldat))
                dev_dbg(&pldat->pdev->dev, "enabling %s\n", ndev->name);
 
-       __lpc_eth_clock_enable(pldat, true);
+       ret = clk_prepare_enable(pldat->clk);
+       if (ret)
+               return ret;
 
        /* Suspended PHY makes LPC ethernet core block, so resume now */
        phy_resume(ndev->phydev);
@@ -1320,7 +1315,9 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
        }
 
        /* Enable network clock */
-       __lpc_eth_clock_enable(pldat, true);
+       ret = clk_prepare_enable(pldat->clk);
+       if (ret)
+               goto err_out_clk_put;
 
        /* Map IO space */
        pldat->net_base = ioremap(res->start, resource_size(res));
@@ -1454,6 +1451,7 @@ err_out_iounmap:
        iounmap(pldat->net_base);
 err_out_disable_clocks:
        clk_disable_unprepare(pldat->clk);
+err_out_clk_put:
        clk_put(pldat->clk);
 err_out_free_dev:
        free_netdev(ndev);
index d1f157e..86a5b4f 100644 (file)
@@ -2,5 +2,5 @@ obj-$(CONFIG_QED) := qed.o
 
 qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \
         qed_int.o qed_main.o qed_mcp.o qed_sp_commands.o qed_spq.o qed_l2.o \
-        qed_selftest.o qed_dcbx.o
+        qed_selftest.o qed_dcbx.o qed_debug.o
 qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o
index 35e5377..0929582 100644 (file)
 #include <linux/zlib.h>
 #include <linux/hashtable.h>
 #include <linux/qed/qed_if.h>
+#include "qed_debug.h"
 #include "qed_hsi.h"
 
 extern const struct qed_common_ops qed_common_ops_pass;
-#define DRV_MODULE_VERSION "8.7.1.20"
+#define DRV_MODULE_VERSION "8.10.9.20"
 
 #define MAX_HWFNS_PER_DEVICE    (4)
 #define NAME_SIZE 16
@@ -42,6 +43,8 @@ enum qed_coalescing_mode {
 
 struct qed_eth_cb_ops;
 struct qed_dev_info;
+union qed_mcp_protocol_stats;
+enum qed_mcp_protocol_type;
 
 /* helpers */
 static inline u32 qed_db_addr(u32 cid, u32 DEMS)
@@ -393,6 +396,8 @@ struct qed_hwfn {
        /* Buffer for unzipping firmware data */
        void                            *unzip_buf;
 
+       struct dbg_tools_data           dbg_info;
+
        struct qed_simd_fp_handler      simd_proto_handler[64];
 
 #ifdef CONFIG_QED_SRIOV
@@ -428,6 +433,19 @@ struct qed_int_params {
        u8                      fp_msix_cnt;
 };
 
+struct qed_dbg_feature {
+       struct dentry *dentry;
+       u8 *dump_buf;
+       u32 buf_size;
+       u32 dumped_dwords;
+};
+
+struct qed_dbg_params {
+       struct qed_dbg_feature features[DBG_FEATURE_NUM];
+       u8 engine_for_debug;
+       bool print_data;
+};
+
 struct qed_dev {
        u32     dp_module;
        u8      dp_level;
@@ -442,6 +460,8 @@ struct qed_dev {
                                 CHIP_REV_IS_A0(dev))
 #define QED_IS_BB_B0(dev)       (QED_IS_BB(dev) && \
                                 CHIP_REV_IS_B0(dev))
+#define QED_IS_AH(dev)  ((dev)->type == QED_DEV_TYPE_AH)
+#define QED_IS_K2(dev)  QED_IS_AH(dev)
 
 #define QED_GET_TYPE(dev)       (QED_IS_BB_A0(dev) ? CHIP_BB_A0 : \
                                 QED_IS_BB_B0(dev) ? CHIP_BB_B0 : CHIP_K2)
@@ -542,6 +562,8 @@ struct qed_dev {
        } protocol_ops;
        void                            *ops_cookie;
 
+       struct qed_dbg_params           dbg_params;
+
        const struct firmware           *firmware;
 };
 
@@ -561,9 +583,18 @@ struct qed_dev {
 static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev,
                                        u32 concrete_fid)
 {
+       u8 vfid = GET_FIELD(concrete_fid, PXP_CONCRETE_FID_VFID);
        u8 pfid = GET_FIELD(concrete_fid, PXP_CONCRETE_FID_PFID);
+       u8 vf_valid = GET_FIELD(concrete_fid,
+                               PXP_CONCRETE_FID_VFVALID);
+       u8 sw_fid;
 
-       return pfid;
+       if (vf_valid)
+               sw_fid = vfid + MAX_NUM_PFS;
+       else
+               sw_fid = pfid;
+
+       return sw_fid;
 }
 
 #define PURE_LB_TC 8
@@ -597,7 +628,9 @@ void qed_link_update(struct qed_hwfn *hwfn);
 u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
                   u32 input_len, u8 *input_buf,
                   u32 max_size, u8 *unzip_buf);
-
+void qed_get_protocol_stats(struct qed_dev *cdev,
+                           enum qed_mcp_protocol_type type,
+                           union qed_mcp_protocol_stats *stats);
 int qed_slowpath_irq_req(struct qed_hwfn *hwfn);
 
 #endif /* _QED_H */
index 5476927..dd579b2 100644 (file)
@@ -792,10 +792,9 @@ static int qed_cxt_src_t2_alloc(struct qed_hwfn *p_hwfn)
        p_mngr->t2_num_pages = DIV_ROUND_UP(total_size, psz);
 
        /* allocate t2 */
-       p_mngr->t2 = kzalloc(p_mngr->t2_num_pages * sizeof(struct qed_dma_mem),
+       p_mngr->t2 = kcalloc(p_mngr->t2_num_pages, sizeof(struct qed_dma_mem),
                             GFP_KERNEL);
        if (!p_mngr->t2) {
-               DP_NOTICE(p_hwfn, "Failed to allocate t2 table\n");
                rc = -ENOMEM;
                goto t2_fail;
        }
@@ -957,7 +956,6 @@ static int qed_ilt_shadow_alloc(struct qed_hwfn *p_hwfn)
        p_mngr->ilt_shadow = kcalloc(size, sizeof(struct qed_dma_mem),
                                     GFP_KERNEL);
        if (!p_mngr->ilt_shadow) {
-               DP_NOTICE(p_hwfn, "Failed to allocate ilt shadow table\n");
                rc = -ENOMEM;
                goto ilt_shadow_fail;
        }
@@ -1050,10 +1048,8 @@ int qed_cxt_mngr_alloc(struct qed_hwfn *p_hwfn)
        u32 i;
 
        p_mngr = kzalloc(sizeof(*p_mngr), GFP_KERNEL);
-       if (!p_mngr) {
-               DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_cxt_mngr'\n");
+       if (!p_mngr)
                return -ENOMEM;
-       }
 
        /* Initialize ILT client registers */
        clients = p_mngr->clients;
@@ -1105,24 +1101,18 @@ int qed_cxt_tables_alloc(struct qed_hwfn *p_hwfn)
 
        /* Allocate the ILT shadow table */
        rc = qed_ilt_shadow_alloc(p_hwfn);
-       if (rc) {
-               DP_NOTICE(p_hwfn, "Failed to allocate ilt memory\n");
+       if (rc)
                goto tables_alloc_fail;
-       }
 
        /* Allocate the T2  table */
        rc = qed_cxt_src_t2_alloc(p_hwfn);
-       if (rc) {
-               DP_NOTICE(p_hwfn, "Failed to allocate T2 memory\n");
+       if (rc)
                goto tables_alloc_fail;
-       }
 
        /* Allocate and initialize the acquired cids bitmaps */
        rc = qed_cid_map_alloc(p_hwfn);
-       if (rc) {
-               DP_NOTICE(p_hwfn, "Failed to allocate cid maps\n");
+       if (rc)
                goto tables_alloc_fail;
-       }
 
        return 0;
 
index b900dfb..130da1c 100644 (file)
@@ -19,6 +19,7 @@
 #include "qed_dcbx.h"
 #include "qed_hsi.h"
 #include "qed_sp.h"
+#include "qed_sriov.h"
 #ifdef CONFIG_DCB
 #include <linux/qed/qed_eth_if.h>
 #endif
@@ -874,11 +875,8 @@ int qed_dcbx_info_alloc(struct qed_hwfn *p_hwfn)
        int rc = 0;
 
        p_hwfn->p_dcbx_info = kzalloc(sizeof(*p_hwfn->p_dcbx_info), GFP_KERNEL);
-       if (!p_hwfn->p_dcbx_info) {
-               DP_NOTICE(p_hwfn,
-                         "Failed to allocate 'struct qed_dcbx_info'\n");
+       if (!p_hwfn->p_dcbx_info)
                rc = -ENOMEM;
-       }
 
        return rc;
 }
@@ -945,6 +943,9 @@ static int qed_dcbx_query_params(struct qed_hwfn *p_hwfn,
        struct qed_ptt *p_ptt;
        int rc;
 
+       if (IS_VF(p_hwfn->cdev))
+               return -EINVAL;
+
        p_ptt = qed_ptt_acquire(p_hwfn);
        if (!p_ptt)
                return -EBUSY;
@@ -984,6 +985,7 @@ qed_dcbx_set_pfc_data(struct qed_hwfn *p_hwfn,
                if (p_params->pfc.prio[i])
                        pfc_map |= BIT(i);
 
+       *pfc &= ~DCBX_PFC_PRI_EN_BITMAP_MASK;
        *pfc |= (pfc_map << DCBX_PFC_PRI_EN_BITMAP_SHIFT);
 
        DP_VERBOSE(p_hwfn, QED_MSG_DCB, "pfc = 0x%x\n", *pfc);
@@ -1058,24 +1060,33 @@ qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn,
 
        for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) {
                entry = &p_app->app_pri_tbl[i].entry;
+               *entry = 0;
                if (ieee) {
-                       *entry &= ~DCBX_APP_SF_IEEE_MASK;
+                       *entry &= ~(DCBX_APP_SF_IEEE_MASK | DCBX_APP_SF_MASK);
                        switch (p_params->app_entry[i].sf_ieee) {
                        case QED_DCBX_SF_IEEE_ETHTYPE:
                                *entry |= ((u32)DCBX_APP_SF_IEEE_ETHTYPE <<
                                           DCBX_APP_SF_IEEE_SHIFT);
+                               *entry |= ((u32)DCBX_APP_SF_ETHTYPE <<
+                                          DCBX_APP_SF_SHIFT);
                                break;
                        case QED_DCBX_SF_IEEE_TCP_PORT:
                                *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_PORT <<
                                           DCBX_APP_SF_IEEE_SHIFT);
+                               *entry |= ((u32)DCBX_APP_SF_PORT <<
+                                          DCBX_APP_SF_SHIFT);
                                break;
                        case QED_DCBX_SF_IEEE_UDP_PORT:
                                *entry |= ((u32)DCBX_APP_SF_IEEE_UDP_PORT <<
                                           DCBX_APP_SF_IEEE_SHIFT);
+                               *entry |= ((u32)DCBX_APP_SF_PORT <<
+                                          DCBX_APP_SF_SHIFT);
                                break;
                        case QED_DCBX_SF_IEEE_TCP_UDP_PORT:
                                *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_UDP_PORT <<
                                           DCBX_APP_SF_IEEE_SHIFT);
+                               *entry |= ((u32)DCBX_APP_SF_PORT <<
+                                          DCBX_APP_SF_SHIFT);
                                break;
                        }
                } else {
@@ -1175,11 +1186,9 @@ int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn,
                return 0;
        }
 
-       dcbx_info = kmalloc(sizeof(*dcbx_info), GFP_KERNEL);
-       if (!dcbx_info) {
-               DP_ERR(p_hwfn, "Failed to allocate struct qed_dcbx_info\n");
+       dcbx_info = kzalloc(sizeof(*dcbx_info), GFP_KERNEL);
+       if (!dcbx_info)
                return -ENOMEM;
-       }
 
        rc = qed_dcbx_query_params(p_hwfn, dcbx_info, QED_DCBX_OPERATIONAL_MIB);
        if (rc) {
@@ -1212,11 +1221,9 @@ static struct qed_dcbx_get *qed_dcbnl_get_dcbx(struct qed_hwfn *hwfn,
 {
        struct qed_dcbx_get *dcbx_info;
 
-       dcbx_info = kmalloc(sizeof(*dcbx_info), GFP_KERNEL);
-       if (!dcbx_info) {
-               DP_ERR(hwfn->cdev, "Failed to allocate memory for dcbx_info\n");
+       dcbx_info = kzalloc(sizeof(*dcbx_info), GFP_KERNEL);
+       if (!dcbx_info)
                return NULL;
-       }
 
        if (qed_dcbx_query_params(hwfn, dcbx_info, type)) {
                kfree(dcbx_info);
@@ -2137,17 +2144,19 @@ static int qed_dcbnl_ieee_setets(struct qed_dev *cdev, struct ieee_ets *ets)
        return rc;
 }
 
-int qed_dcbnl_ieee_peer_getets(struct qed_dev *cdev, struct ieee_ets *ets)
+static int
+qed_dcbnl_ieee_peer_getets(struct qed_dev *cdev, struct ieee_ets *ets)
 {
        return qed_dcbnl_get_ieee_ets(cdev, ets, true);
 }
 
-int qed_dcbnl_ieee_peer_getpfc(struct qed_dev *cdev, struct ieee_pfc *pfc)
+static int
+qed_dcbnl_ieee_peer_getpfc(struct qed_dev *cdev, struct ieee_pfc *pfc)
 {
        return qed_dcbnl_get_ieee_pfc(cdev, pfc, true);
 }
 
-int qed_dcbnl_ieee_getapp(struct qed_dev *cdev, struct dcb_app *app)
+static int qed_dcbnl_ieee_getapp(struct qed_dev *cdev, struct dcb_app *app)
 {
        struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
        struct qed_dcbx_get *dcbx_info;
@@ -2191,7 +2200,7 @@ int qed_dcbnl_ieee_getapp(struct qed_dev *cdev, struct dcb_app *app)
        return 0;
 }
 
-int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app)
+static int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app)
 {
        struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
        struct qed_dcbx_get *dcbx_info;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
new file mode 100644 (file)
index 0000000..88e7d5b
--- /dev/null
@@ -0,0 +1,6898 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/crc32.h>
+#include "qed.h"
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_mcp.h"
+#include "qed_reg_addr.h"
+
+/* Chip IDs enum */
+enum chip_ids {
+       CHIP_RESERVED,
+       CHIP_BB_B0,
+       CHIP_K2,
+       MAX_CHIP_IDS
+};
+
+/* Memory groups enum */
+enum mem_groups {
+       MEM_GROUP_PXP_MEM,
+       MEM_GROUP_DMAE_MEM,
+       MEM_GROUP_CM_MEM,
+       MEM_GROUP_QM_MEM,
+       MEM_GROUP_TM_MEM,
+       MEM_GROUP_BRB_RAM,
+       MEM_GROUP_BRB_MEM,
+       MEM_GROUP_PRS_MEM,
+       MEM_GROUP_SDM_MEM,
+       MEM_GROUP_PBUF,
+       MEM_GROUP_IOR,
+       MEM_GROUP_RAM,
+       MEM_GROUP_BTB_RAM,
+       MEM_GROUP_RDIF_CTX,
+       MEM_GROUP_TDIF_CTX,
+       MEM_GROUP_CONN_CFC_MEM,
+       MEM_GROUP_TASK_CFC_MEM,
+       MEM_GROUP_CAU_PI,
+       MEM_GROUP_CAU_MEM,
+       MEM_GROUP_PXP_ILT,
+       MEM_GROUP_MULD_MEM,
+       MEM_GROUP_BTB_MEM,
+       MEM_GROUP_IGU_MEM,
+       MEM_GROUP_IGU_MSIX,
+       MEM_GROUP_CAU_SB,
+       MEM_GROUP_BMB_RAM,
+       MEM_GROUP_BMB_MEM,
+       MEM_GROUPS_NUM
+};
+
+/* Memory groups names */
+static const char * const s_mem_group_names[] = {
+       "PXP_MEM",
+       "DMAE_MEM",
+       "CM_MEM",
+       "QM_MEM",
+       "TM_MEM",
+       "BRB_RAM",
+       "BRB_MEM",
+       "PRS_MEM",
+       "SDM_MEM",
+       "PBUF",
+       "IOR",
+       "RAM",
+       "BTB_RAM",
+       "RDIF_CTX",
+       "TDIF_CTX",
+       "CONN_CFC_MEM",
+       "TASK_CFC_MEM",
+       "CAU_PI",
+       "CAU_MEM",
+       "PXP_ILT",
+       "MULD_MEM",
+       "BTB_MEM",
+       "IGU_MEM",
+       "IGU_MSIX",
+       "CAU_SB",
+       "BMB_RAM",
+       "BMB_MEM",
+};
+
+/* Idle check conditions */
+static u32 cond4(const u32 *r, const u32 *imm)
+{
+       return ((r[0] & imm[0]) != imm[1]) && ((r[1] & imm[2]) != imm[3]);
+}
+
+static u32 cond6(const u32 *r, const u32 *imm)
+{
+       return ((r[0] >> imm[0]) & imm[1]) != imm[2];
+}
+
+static u32 cond5(const u32 *r, const u32 *imm)
+{
+       return (r[0] & imm[0]) != imm[1];
+}
+
+static u32 cond8(const u32 *r, const u32 *imm)
+{
+       return ((r[0] & imm[0]) >> imm[1]) !=
+           (((r[0] & imm[2]) >> imm[3]) | ((r[1] & imm[4]) << imm[5]));
+}
+
+static u32 cond9(const u32 *r, const u32 *imm)
+{
+       return ((r[0] & imm[0]) >> imm[1]) != (r[0] & imm[2]);
+}
+
+static u32 cond1(const u32 *r, const u32 *imm)
+{
+       return (r[0] & ~imm[0]) != imm[1];
+}
+
+static u32 cond0(const u32 *r, const u32 *imm)
+{
+       return r[0] != imm[0];
+}
+
+static u32 cond10(const u32 *r, const u32 *imm)
+{
+       return r[0] != r[1] && r[2] == imm[0];
+}
+
+static u32 cond11(const u32 *r, const u32 *imm)
+{
+       return r[0] != r[1] && r[2] > imm[0];
+}
+
+static u32 cond3(const u32 *r, const u32 *imm)
+{
+       return r[0] != r[1];
+}
+
+static u32 cond12(const u32 *r, const u32 *imm)
+{
+       return r[0] & imm[0];
+}
+
+static u32 cond7(const u32 *r, const u32 *imm)
+{
+       return r[0] < (r[1] - imm[0]);
+}
+
+static u32 cond2(const u32 *r, const u32 *imm)
+{
+       return r[0] > imm[0];
+}
+
+/* Array of Idle Check conditions */
+static u32(*cond_arr[]) (const u32 *r, const u32 *imm) = {
+       cond0,
+       cond1,
+       cond2,
+       cond3,
+       cond4,
+       cond5,
+       cond6,
+       cond7,
+       cond8,
+       cond9,
+       cond10,
+       cond11,
+       cond12,
+};
+
+/******************************* Data Types **********************************/
+
+enum platform_ids {
+       PLATFORM_ASIC,
+       PLATFORM_RESERVED,
+       PLATFORM_RESERVED2,
+       PLATFORM_RESERVED3,
+       MAX_PLATFORM_IDS
+};
+
+struct dbg_array {
+       const u32 *ptr;
+       u32 size_in_dwords;
+};
+
+/* Chip constant definitions */
+struct chip_defs {
+       const char *name;
+       struct {
+               u8 num_ports;
+               u8 num_pfs;
+       } per_platform[MAX_PLATFORM_IDS];
+};
+
+/* Platform constant definitions */
+struct platform_defs {
+       const char *name;
+       u32 delay_factor;
+};
+
+/* Storm constant definitions */
+struct storm_defs {
+       char letter;
+       enum block_id block_id;
+       enum dbg_bus_clients dbg_client_id[MAX_CHIP_IDS];
+       bool has_vfc;
+       u32 sem_fast_mem_addr;
+       u32 sem_frame_mode_addr;
+       u32 sem_slow_enable_addr;
+       u32 sem_slow_mode_addr;
+       u32 sem_slow_mode1_conf_addr;
+       u32 sem_sync_dbg_empty_addr;
+       u32 sem_slow_dbg_empty_addr;
+       u32 cm_ctx_wr_addr;
+       u32 cm_conn_ag_ctx_lid_size; /* In quad-regs */
+       u32 cm_conn_ag_ctx_rd_addr;
+       u32 cm_conn_st_ctx_lid_size; /* In quad-regs */
+       u32 cm_conn_st_ctx_rd_addr;
+       u32 cm_task_ag_ctx_lid_size; /* In quad-regs */
+       u32 cm_task_ag_ctx_rd_addr;
+       u32 cm_task_st_ctx_lid_size; /* In quad-regs */
+       u32 cm_task_st_ctx_rd_addr;
+};
+
+/* Block constant definitions */
+struct block_defs {
+       const char *name;
+       bool has_dbg_bus[MAX_CHIP_IDS];
+       bool associated_to_storm;
+       u32 storm_id; /* Valid only if associated_to_storm is true */
+       enum dbg_bus_clients dbg_client_id[MAX_CHIP_IDS];
+       u32 dbg_select_addr;
+       u32 dbg_cycle_enable_addr;
+       u32 dbg_shift_addr;
+       u32 dbg_force_valid_addr;
+       u32 dbg_force_frame_addr;
+       bool has_reset_bit;
+       bool unreset; /* If true, the block is taken out of reset before dump */
+       enum dbg_reset_regs reset_reg;
+       u8 reset_bit_offset; /* Bit offset in reset register */
+};
+
+/* Reset register definitions */
+struct reset_reg_defs {
+       u32 addr;
+       u32 unreset_val;
+       bool exists[MAX_CHIP_IDS];
+};
+
+struct grc_param_defs {
+       u32 default_val[MAX_CHIP_IDS];
+       u32 min;
+       u32 max;
+       bool is_preset;
+       u32 exclude_all_preset_val;
+       u32 crash_preset_val;
+};
+
+struct rss_mem_defs {
+       const char *mem_name;
+       const char *type_name;
+       u32 addr; /* In 128b units */
+       u32 num_entries[MAX_CHIP_IDS];
+       u32 entry_width[MAX_CHIP_IDS]; /* In bits */
+};
+
+struct vfc_ram_defs {
+       const char *mem_name;
+       const char *type_name;
+       u32 base_row;
+       u32 num_rows;
+};
+
+struct big_ram_defs {
+       const char *instance_name;
+       enum mem_groups mem_group_id;
+       enum mem_groups ram_mem_group_id;
+       enum dbg_grc_params grc_param;
+       u32 addr_reg_addr;
+       u32 data_reg_addr;
+       u32 num_of_blocks[MAX_CHIP_IDS];
+};
+
+struct phy_defs {
+       const char *phy_name;
+       u32 base_addr;
+       u32 tbus_addr_lo_addr;
+       u32 tbus_addr_hi_addr;
+       u32 tbus_data_lo_addr;
+       u32 tbus_data_hi_addr;
+};
+
+/******************************** Constants **********************************/
+
+#define MAX_LCIDS                      320
+#define MAX_LTIDS                      320
+#define NUM_IOR_SETS                   2
+#define IORS_PER_SET                   176
+#define IOR_SET_OFFSET(set_id)         ((set_id) * 256)
+#define BYTES_IN_DWORD                 sizeof(u32)
+
+/* In the macros below, size and offset are specified in bits */
+#define CEIL_DWORDS(size)              DIV_ROUND_UP(size, 32)
+#define FIELD_BIT_OFFSET(type, field)  type ## _ ## field ## _ ## OFFSET
+#define FIELD_BIT_SIZE(type, field)    type ## _ ## field ## _ ## SIZE
+#define FIELD_DWORD_OFFSET(type, field) \
+        (int)(FIELD_BIT_OFFSET(type, field) / 32)
+#define FIELD_DWORD_SHIFT(type, field) (FIELD_BIT_OFFSET(type, field) % 32)
+#define FIELD_BIT_MASK(type, field) \
+       (((1 << FIELD_BIT_SIZE(type, field)) - 1) << \
+        FIELD_DWORD_SHIFT(type, field))
+#define SET_VAR_FIELD(var, type, field, val) \
+       do { \
+               var[FIELD_DWORD_OFFSET(type, field)] &= \
+               (~FIELD_BIT_MASK(type, field)); \
+               var[FIELD_DWORD_OFFSET(type, field)] |= \
+               (val) << FIELD_DWORD_SHIFT(type, field); \
+       } while (0)
+#define ARR_REG_WR(dev, ptt, addr, arr, arr_size) \
+       do { \
+               for (i = 0; i < (arr_size); i++) \
+                       qed_wr(dev, ptt, addr,  (arr)[i]); \
+       } while (0)
+#define ARR_REG_RD(dev, ptt, addr, arr, arr_size) \
+       do { \
+               for (i = 0; i < (arr_size); i++) \
+                       (arr)[i] = qed_rd(dev, ptt, addr); \
+       } while (0)
+
+#define DWORDS_TO_BYTES(dwords)                ((dwords) * BYTES_IN_DWORD)
+#define BYTES_TO_DWORDS(bytes)         ((bytes) / BYTES_IN_DWORD)
+#define RAM_LINES_TO_DWORDS(lines)     ((lines) * 2)
+#define RAM_LINES_TO_BYTES(lines) \
+       DWORDS_TO_BYTES(RAM_LINES_TO_DWORDS(lines))
+#define REG_DUMP_LEN_SHIFT             24
+#define MEM_DUMP_ENTRY_SIZE_DWORDS \
+       BYTES_TO_DWORDS(sizeof(struct dbg_dump_mem))
+#define IDLE_CHK_RULE_SIZE_DWORDS \
+       BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_rule))
+#define IDLE_CHK_RESULT_HDR_DWORDS \
+       BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_result_hdr))
+#define IDLE_CHK_RESULT_REG_HDR_DWORDS \
+       BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_result_reg_hdr))
+#define IDLE_CHK_MAX_ENTRIES_SIZE      32
+
+/* The sizes and offsets below are specified in bits */
+#define VFC_CAM_CMD_STRUCT_SIZE                64
+#define VFC_CAM_CMD_ROW_OFFSET         48
+#define VFC_CAM_CMD_ROW_SIZE           9
+#define VFC_CAM_ADDR_STRUCT_SIZE       16
+#define VFC_CAM_ADDR_OP_OFFSET         0
+#define VFC_CAM_ADDR_OP_SIZE           4
+#define VFC_CAM_RESP_STRUCT_SIZE       256
+#define VFC_RAM_ADDR_STRUCT_SIZE       16
+#define VFC_RAM_ADDR_OP_OFFSET         0
+#define VFC_RAM_ADDR_OP_SIZE           2
+#define VFC_RAM_ADDR_ROW_OFFSET                2
+#define VFC_RAM_ADDR_ROW_SIZE          10
+#define VFC_RAM_RESP_STRUCT_SIZE       256
+#define VFC_CAM_CMD_DWORDS             CEIL_DWORDS(VFC_CAM_CMD_STRUCT_SIZE)
+#define VFC_CAM_ADDR_DWORDS            CEIL_DWORDS(VFC_CAM_ADDR_STRUCT_SIZE)
+#define VFC_CAM_RESP_DWORDS            CEIL_DWORDS(VFC_CAM_RESP_STRUCT_SIZE)
+#define VFC_RAM_CMD_DWORDS             VFC_CAM_CMD_DWORDS
+#define VFC_RAM_ADDR_DWORDS            CEIL_DWORDS(VFC_RAM_ADDR_STRUCT_SIZE)
+#define VFC_RAM_RESP_DWORDS            CEIL_DWORDS(VFC_RAM_RESP_STRUCT_SIZE)
+#define NUM_VFC_RAM_TYPES              4
+#define VFC_CAM_NUM_ROWS               512
+#define VFC_OPCODE_CAM_RD              14
+#define VFC_OPCODE_RAM_RD              0
+#define NUM_RSS_MEM_TYPES              5
+#define NUM_BIG_RAM_TYPES              3
+#define BIG_RAM_BLOCK_SIZE_BYTES       128
+#define BIG_RAM_BLOCK_SIZE_DWORDS \
+       BYTES_TO_DWORDS(BIG_RAM_BLOCK_SIZE_BYTES)
+#define NUM_PHY_TBUS_ADDRESSES         2048
+#define PHY_DUMP_SIZE_DWORDS           (NUM_PHY_TBUS_ADDRESSES / 2)
+#define RESET_REG_UNRESET_OFFSET       4
+#define STALL_DELAY_MS                 500
+#define STATIC_DEBUG_LINE_DWORDS       9
+#define NUM_DBG_BUS_LINES              256
+#define NUM_COMMON_GLOBAL_PARAMS       8
+#define FW_IMG_MAIN                    1
+#define REG_FIFO_DEPTH_ELEMENTS                32
+#define REG_FIFO_ELEMENT_DWORDS                2
+#define REG_FIFO_DEPTH_DWORDS \
+       (REG_FIFO_ELEMENT_DWORDS * REG_FIFO_DEPTH_ELEMENTS)
+#define IGU_FIFO_DEPTH_ELEMENTS                64
+#define IGU_FIFO_ELEMENT_DWORDS                4
+#define IGU_FIFO_DEPTH_DWORDS \
+       (IGU_FIFO_ELEMENT_DWORDS * IGU_FIFO_DEPTH_ELEMENTS)
+#define PROTECTION_OVERRIDE_DEPTH_ELEMENTS     20
+#define PROTECTION_OVERRIDE_ELEMENT_DWORDS     2
+#define PROTECTION_OVERRIDE_DEPTH_DWORDS \
+       (PROTECTION_OVERRIDE_DEPTH_ELEMENTS * \
+        PROTECTION_OVERRIDE_ELEMENT_DWORDS)
+#define MCP_SPAD_TRACE_OFFSIZE_ADDR \
+       (MCP_REG_SCRATCH + \
+        offsetof(struct static_init, sections[SPAD_SECTION_TRACE]))
+#define MCP_TRACE_META_IMAGE_SIGNATURE  0x669955aa
+#define EMPTY_FW_VERSION_STR           "???_???_???_???"
+#define EMPTY_FW_IMAGE_STR             "???????????????"
+
+/***************************** Constant Arrays *******************************/
+
+/* Debug arrays */
+static struct dbg_array s_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {0} };
+
+/* Chip constant definitions array */
+static struct chip_defs s_chip_defs[MAX_CHIP_IDS] = {
+       { "reserved", { {0, 0}, {0, 0}, {0, 0}, {0, 0} } },
+       { "bb_b0",
+         { {MAX_NUM_PORTS_BB, MAX_NUM_PFS_BB}, {0, 0}, {0, 0}, {0, 0} } },
+       { "k2", { {MAX_NUM_PORTS_K2, MAX_NUM_PFS_K2}, {0, 0}, {0, 0}, {0, 0} } }
+};
+
+/* Storm constant definitions array */
+static struct storm_defs s_storm_defs[] = {
+       /* Tstorm */
+       {'T', BLOCK_TSEM,
+        {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT,
+         DBG_BUS_CLIENT_RBCT}, true,
+        TSEM_REG_FAST_MEMORY,
+        TSEM_REG_DBG_FRAME_MODE, TSEM_REG_SLOW_DBG_ACTIVE,
+        TSEM_REG_SLOW_DBG_MODE, TSEM_REG_DBG_MODE1_CFG,
+        TSEM_REG_SYNC_DBG_EMPTY, TSEM_REG_SLOW_DBG_EMPTY,
+        TCM_REG_CTX_RBC_ACCS,
+        4, TCM_REG_AGG_CON_CTX,
+        16, TCM_REG_SM_CON_CTX,
+        2, TCM_REG_AGG_TASK_CTX,
+        4, TCM_REG_SM_TASK_CTX},
+       /* Mstorm */
+       {'M', BLOCK_MSEM,
+        {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT,
+         DBG_BUS_CLIENT_RBCM}, false,
+        MSEM_REG_FAST_MEMORY,
+        MSEM_REG_DBG_FRAME_MODE, MSEM_REG_SLOW_DBG_ACTIVE,
+        MSEM_REG_SLOW_DBG_MODE, MSEM_REG_DBG_MODE1_CFG,
+        MSEM_REG_SYNC_DBG_EMPTY, MSEM_REG_SLOW_DBG_EMPTY,
+        MCM_REG_CTX_RBC_ACCS,
+        1, MCM_REG_AGG_CON_CTX,
+        10, MCM_REG_SM_CON_CTX,
+        2, MCM_REG_AGG_TASK_CTX,
+        7, MCM_REG_SM_TASK_CTX},
+       /* Ustorm */
+       {'U', BLOCK_USEM,
+        {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU,
+         DBG_BUS_CLIENT_RBCU}, false,
+        USEM_REG_FAST_MEMORY,
+        USEM_REG_DBG_FRAME_MODE, USEM_REG_SLOW_DBG_ACTIVE,
+        USEM_REG_SLOW_DBG_MODE, USEM_REG_DBG_MODE1_CFG,
+        USEM_REG_SYNC_DBG_EMPTY, USEM_REG_SLOW_DBG_EMPTY,
+        UCM_REG_CTX_RBC_ACCS,
+        2, UCM_REG_AGG_CON_CTX,
+        13, UCM_REG_SM_CON_CTX,
+        3, UCM_REG_AGG_TASK_CTX,
+        3, UCM_REG_SM_TASK_CTX},
+       /* Xstorm */
+       {'X', BLOCK_XSEM,
+        {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX,
+         DBG_BUS_CLIENT_RBCX}, false,
+        XSEM_REG_FAST_MEMORY,
+        XSEM_REG_DBG_FRAME_MODE, XSEM_REG_SLOW_DBG_ACTIVE,
+        XSEM_REG_SLOW_DBG_MODE, XSEM_REG_DBG_MODE1_CFG,
+        XSEM_REG_SYNC_DBG_EMPTY, XSEM_REG_SLOW_DBG_EMPTY,
+        XCM_REG_CTX_RBC_ACCS,
+        9, XCM_REG_AGG_CON_CTX,
+        15, XCM_REG_SM_CON_CTX,
+        0, 0,
+        0, 0},
+       /* Ystorm */
+       {'Y', BLOCK_YSEM,
+        {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX,
+         DBG_BUS_CLIENT_RBCY}, false,
+        YSEM_REG_FAST_MEMORY,
+        YSEM_REG_DBG_FRAME_MODE, YSEM_REG_SLOW_DBG_ACTIVE,
+        YSEM_REG_SLOW_DBG_MODE, YSEM_REG_DBG_MODE1_CFG,
+        YSEM_REG_SYNC_DBG_EMPTY, TSEM_REG_SLOW_DBG_EMPTY,
+        YCM_REG_CTX_RBC_ACCS,
+        2, YCM_REG_AGG_CON_CTX,
+        3, YCM_REG_SM_CON_CTX,
+        2, YCM_REG_AGG_TASK_CTX,
+        12, YCM_REG_SM_TASK_CTX},
+       /* Pstorm */
+       {'P', BLOCK_PSEM,
+        {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS,
+         DBG_BUS_CLIENT_RBCS}, true,
+        PSEM_REG_FAST_MEMORY,
+        PSEM_REG_DBG_FRAME_MODE, PSEM_REG_SLOW_DBG_ACTIVE,
+        PSEM_REG_SLOW_DBG_MODE, PSEM_REG_DBG_MODE1_CFG,
+        PSEM_REG_SYNC_DBG_EMPTY, PSEM_REG_SLOW_DBG_EMPTY,
+        PCM_REG_CTX_RBC_ACCS,
+        0, 0,
+        10, PCM_REG_SM_CON_CTX,
+        0, 0,
+        0, 0}
+};
+
+/* Block definitions array */
+static struct block_defs block_grc_defs = {
+       "grc", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN},
+       GRC_REG_DBG_SELECT, GRC_REG_DBG_DWORD_ENABLE,
+       GRC_REG_DBG_SHIFT, GRC_REG_DBG_FORCE_VALID,
+       GRC_REG_DBG_FORCE_FRAME,
+       true, false, DBG_RESET_REG_MISC_PL_UA, 1
+};
+
+static struct block_defs block_miscs_defs = {
+       "miscs", {false, false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_misc_defs = {
+       "misc", {false, false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_dbu_defs = {
+       "dbu", {false, false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_pglue_b_defs = {
+       "pglue_b", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCH, DBG_BUS_CLIENT_RBCH, DBG_BUS_CLIENT_RBCH},
+       PGLUE_B_REG_DBG_SELECT, PGLUE_B_REG_DBG_DWORD_ENABLE,
+       PGLUE_B_REG_DBG_SHIFT, PGLUE_B_REG_DBG_FORCE_VALID,
+       PGLUE_B_REG_DBG_FORCE_FRAME,
+       true, false, DBG_RESET_REG_MISCS_PL_HV, 1
+};
+
+static struct block_defs block_cnig_defs = {
+       "cnig", {false, false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW},
+       CNIG_REG_DBG_SELECT_K2, CNIG_REG_DBG_DWORD_ENABLE_K2,
+       CNIG_REG_DBG_SHIFT_K2, CNIG_REG_DBG_FORCE_VALID_K2,
+       CNIG_REG_DBG_FORCE_FRAME_K2,
+       true, false, DBG_RESET_REG_MISCS_PL_HV, 0
+};
+
+static struct block_defs block_cpmu_defs = {
+       "cpmu", {false, false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       true, false, DBG_RESET_REG_MISCS_PL_HV, 8
+};
+
+static struct block_defs block_ncsi_defs = {
+       "ncsi", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ},
+       NCSI_REG_DBG_SELECT, NCSI_REG_DBG_DWORD_ENABLE,
+       NCSI_REG_DBG_SHIFT, NCSI_REG_DBG_FORCE_VALID,
+       NCSI_REG_DBG_FORCE_FRAME,
+       true, false, DBG_RESET_REG_MISCS_PL_HV, 5
+};
+
+static struct block_defs block_opte_defs = {
+       "opte", {false, false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       true, false, DBG_RESET_REG_MISCS_PL_HV, 4
+};
+
+static struct block_defs block_bmb_defs = {
+       "bmb", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCB},
+       BMB_REG_DBG_SELECT, BMB_REG_DBG_DWORD_ENABLE,
+       BMB_REG_DBG_SHIFT, BMB_REG_DBG_FORCE_VALID,
+       BMB_REG_DBG_FORCE_FRAME,
+       true, false, DBG_RESET_REG_MISCS_PL_UA, 7
+};
+
+static struct block_defs block_pcie_defs = {
+       "pcie", {false, false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH},
+       PCIE_REG_DBG_COMMON_SELECT, PCIE_REG_DBG_COMMON_DWORD_ENABLE,
+       PCIE_REG_DBG_COMMON_SHIFT, PCIE_REG_DBG_COMMON_FORCE_VALID,
+       PCIE_REG_DBG_COMMON_FORCE_FRAME,
+       false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_mcp_defs = {
+       "mcp", {false, false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_mcp2_defs = {
+       "mcp2", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ},
+       MCP2_REG_DBG_SELECT, MCP2_REG_DBG_DWORD_ENABLE,
+       MCP2_REG_DBG_SHIFT, MCP2_REG_DBG_FORCE_VALID,
+       MCP2_REG_DBG_FORCE_FRAME,
+       false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_pswhst_defs = {
+       "pswhst", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       PSWHST_REG_DBG_SELECT, PSWHST_REG_DBG_DWORD_ENABLE,
+       PSWHST_REG_DBG_SHIFT, PSWHST_REG_DBG_FORCE_VALID,
+       PSWHST_REG_DBG_FORCE_FRAME,
+       true, false, DBG_RESET_REG_MISC_PL_HV, 0
+};
+
+static struct block_defs block_pswhst2_defs = {
+       "pswhst2", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       PSWHST2_REG_DBG_SELECT, PSWHST2_REG_DBG_DWORD_ENABLE,
+       PSWHST2_REG_DBG_SHIFT, PSWHST2_REG_DBG_FORCE_VALID,
+       PSWHST2_REG_DBG_FORCE_FRAME,
+       true, false, DBG_RESET_REG_MISC_PL_HV, 0
+};
+
+static struct block_defs block_pswrd_defs = {
+       "pswrd", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       PSWRD_REG_DBG_SELECT, PSWRD_REG_DBG_DWORD_ENABLE,
+       PSWRD_REG_DBG_SHIFT, PSWRD_REG_DBG_FORCE_VALID,
+       PSWRD_REG_DBG_FORCE_FRAME,
+       true, false, DBG_RESET_REG_MISC_PL_HV, 2
+};
+
+static struct block_defs block_pswrd2_defs = {
+       "pswrd2", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       PSWRD2_REG_DBG_SELECT, PSWRD2_REG_DBG_DWORD_ENABLE,
+       PSWRD2_REG_DBG_SHIFT, PSWRD2_REG_DBG_FORCE_VALID,
+       PSWRD2_REG_DBG_FORCE_FRAME,
+       true, false, DBG_RESET_REG_MISC_PL_HV, 2
+};
+
+static struct block_defs block_pswwr_defs = {
+       "pswwr", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       PSWWR_REG_DBG_SELECT, PSWWR_REG_DBG_DWORD_ENABLE,
+       PSWWR_REG_DBG_SHIFT, PSWWR_REG_DBG_FORCE_VALID,
+       PSWWR_REG_DBG_FORCE_FRAME,
+       true, false, DBG_RESET_REG_MISC_PL_HV, 3
+};
+
+static struct block_defs block_pswwr2_defs = {
+       "pswwr2", {false, false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       true, false, DBG_RESET_REG_MISC_PL_HV, 3
+};
+
+static struct block_defs block_pswrq_defs = {
+       "pswrq", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       PSWRQ_REG_DBG_SELECT, PSWRQ_REG_DBG_DWORD_ENABLE,
+       PSWRQ_REG_DBG_SHIFT, PSWRQ_REG_DBG_FORCE_VALID,
+       PSWRQ_REG_DBG_FORCE_FRAME,
+       true, false, DBG_RESET_REG_MISC_PL_HV, 1
+};
+
+static struct block_defs block_pswrq2_defs = {
+       "pswrq2", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       PSWRQ2_REG_DBG_SELECT, PSWRQ2_REG_DBG_DWORD_ENABLE,
+       PSWRQ2_REG_DBG_SHIFT, PSWRQ2_REG_DBG_FORCE_VALID,
+       PSWRQ2_REG_DBG_FORCE_FRAME,
+       true, false, DBG_RESET_REG_MISC_PL_HV, 1
+};
+
+static struct block_defs block_pglcs_defs = {
+       "pglcs", {false, false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH},
+       PGLCS_REG_DBG_SELECT, PGLCS_REG_DBG_DWORD_ENABLE,
+       PGLCS_REG_DBG_SHIFT, PGLCS_REG_DBG_FORCE_VALID,
+       PGLCS_REG_DBG_FORCE_FRAME,
+       true, false, DBG_RESET_REG_MISCS_PL_HV, 2
+};
+
+static struct block_defs block_ptu_defs = {
+       "ptu", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       PTU_REG_DBG_SELECT, PTU_REG_DBG_DWORD_ENABLE,
+       PTU_REG_DBG_SHIFT, PTU_REG_DBG_FORCE_VALID,
+       PTU_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 20
+};
+
+static struct block_defs block_dmae_defs = {
+       "dmae", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       DMAE_REG_DBG_SELECT, DMAE_REG_DBG_DWORD_ENABLE,
+       DMAE_REG_DBG_SHIFT, DMAE_REG_DBG_FORCE_VALID,
+       DMAE_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 28
+};
+
+static struct block_defs block_tcm_defs = {
+       "tcm", {true, true, true}, true, DBG_TSTORM_ID,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
+       TCM_REG_DBG_SELECT, TCM_REG_DBG_DWORD_ENABLE,
+       TCM_REG_DBG_SHIFT, TCM_REG_DBG_FORCE_VALID,
+       TCM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 5
+};
+
+static struct block_defs block_mcm_defs = {
+       "mcm", {true, true, true}, true, DBG_MSTORM_ID,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+       MCM_REG_DBG_SELECT, MCM_REG_DBG_DWORD_ENABLE,
+       MCM_REG_DBG_SHIFT, MCM_REG_DBG_FORCE_VALID,
+       MCM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 3
+};
+
+static struct block_defs block_ucm_defs = {
+       "ucm", {true, true, true}, true, DBG_USTORM_ID,
+       {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
+       UCM_REG_DBG_SELECT, UCM_REG_DBG_DWORD_ENABLE,
+       UCM_REG_DBG_SHIFT, UCM_REG_DBG_FORCE_VALID,
+       UCM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 8
+};
+
+static struct block_defs block_xcm_defs = {
+       "xcm", {true, true, true}, true, DBG_XSTORM_ID,
+       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
+       XCM_REG_DBG_SELECT, XCM_REG_DBG_DWORD_ENABLE,
+       XCM_REG_DBG_SHIFT, XCM_REG_DBG_FORCE_VALID,
+       XCM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 19
+};
+
+static struct block_defs block_ycm_defs = {
+       "ycm", {true, true, true}, true, DBG_YSTORM_ID,
+       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY},
+       YCM_REG_DBG_SELECT, YCM_REG_DBG_DWORD_ENABLE,
+       YCM_REG_DBG_SHIFT, YCM_REG_DBG_FORCE_VALID,
+       YCM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 5
+};
+
+static struct block_defs block_pcm_defs = {
+       "pcm", {true, true, true}, true, DBG_PSTORM_ID,
+       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
+       PCM_REG_DBG_SELECT, PCM_REG_DBG_DWORD_ENABLE,
+       PCM_REG_DBG_SHIFT, PCM_REG_DBG_FORCE_VALID,
+       PCM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 4
+};
+
+static struct block_defs block_qm_defs = {
+       "qm", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCQ},
+       QM_REG_DBG_SELECT, QM_REG_DBG_DWORD_ENABLE,
+       QM_REG_DBG_SHIFT, QM_REG_DBG_FORCE_VALID,
+       QM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 16
+};
+
+static struct block_defs block_tm_defs = {
+       "tm", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
+       TM_REG_DBG_SELECT, TM_REG_DBG_DWORD_ENABLE,
+       TM_REG_DBG_SHIFT, TM_REG_DBG_FORCE_VALID,
+       TM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 17
+};
+
+static struct block_defs block_dorq_defs = {
+       "dorq", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY},
+       DORQ_REG_DBG_SELECT, DORQ_REG_DBG_DWORD_ENABLE,
+       DORQ_REG_DBG_SHIFT, DORQ_REG_DBG_FORCE_VALID,
+       DORQ_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 18
+};
+
+static struct block_defs block_brb_defs = {
+       "brb", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR},
+       BRB_REG_DBG_SELECT, BRB_REG_DBG_DWORD_ENABLE,
+       BRB_REG_DBG_SHIFT, BRB_REG_DBG_FORCE_VALID,
+       BRB_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 0
+};
+
+static struct block_defs block_src_defs = {
+       "src", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
+       SRC_REG_DBG_SELECT, SRC_REG_DBG_DWORD_ENABLE,
+       SRC_REG_DBG_SHIFT, SRC_REG_DBG_FORCE_VALID,
+       SRC_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 2
+};
+
+static struct block_defs block_prs_defs = {
+       "prs", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR},
+       PRS_REG_DBG_SELECT, PRS_REG_DBG_DWORD_ENABLE,
+       PRS_REG_DBG_SHIFT, PRS_REG_DBG_FORCE_VALID,
+       PRS_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 1
+};
+
+static struct block_defs block_tsdm_defs = {
+       "tsdm", {true, true, true}, true, DBG_TSTORM_ID,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
+       TSDM_REG_DBG_SELECT, TSDM_REG_DBG_DWORD_ENABLE,
+       TSDM_REG_DBG_SHIFT, TSDM_REG_DBG_FORCE_VALID,
+       TSDM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 3
+};
+
+static struct block_defs block_msdm_defs = {
+       "msdm", {true, true, true}, true, DBG_MSTORM_ID,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+       MSDM_REG_DBG_SELECT, MSDM_REG_DBG_DWORD_ENABLE,
+       MSDM_REG_DBG_SHIFT, MSDM_REG_DBG_FORCE_VALID,
+       MSDM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 6
+};
+
+static struct block_defs block_usdm_defs = {
+       "usdm", {true, true, true}, true, DBG_USTORM_ID,
+       {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
+       USDM_REG_DBG_SELECT, USDM_REG_DBG_DWORD_ENABLE,
+       USDM_REG_DBG_SHIFT, USDM_REG_DBG_FORCE_VALID,
+       USDM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 7
+};
+
+static struct block_defs block_xsdm_defs = {
+       "xsdm", {true, true, true}, true, DBG_XSTORM_ID,
+       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
+       XSDM_REG_DBG_SELECT, XSDM_REG_DBG_DWORD_ENABLE,
+       XSDM_REG_DBG_SHIFT, XSDM_REG_DBG_FORCE_VALID,
+       XSDM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 20
+};
+
+static struct block_defs block_ysdm_defs = {
+       "ysdm", {true, true, true}, true, DBG_YSTORM_ID,
+       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY},
+       YSDM_REG_DBG_SELECT, YSDM_REG_DBG_DWORD_ENABLE,
+       YSDM_REG_DBG_SHIFT, YSDM_REG_DBG_FORCE_VALID,
+       YSDM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 8
+};
+
+static struct block_defs block_psdm_defs = {
+       "psdm", {true, true, true}, true, DBG_PSTORM_ID,
+       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
+       PSDM_REG_DBG_SELECT, PSDM_REG_DBG_DWORD_ENABLE,
+       PSDM_REG_DBG_SHIFT, PSDM_REG_DBG_FORCE_VALID,
+       PSDM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 7
+};
+
+static struct block_defs block_tsem_defs = {
+       "tsem", {true, true, true}, true, DBG_TSTORM_ID,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
+       TSEM_REG_DBG_SELECT, TSEM_REG_DBG_DWORD_ENABLE,
+       TSEM_REG_DBG_SHIFT, TSEM_REG_DBG_FORCE_VALID,
+       TSEM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 4
+};
+
+static struct block_defs block_msem_defs = {
+       "msem", {true, true, true}, true, DBG_MSTORM_ID,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+       MSEM_REG_DBG_SELECT, MSEM_REG_DBG_DWORD_ENABLE,
+       MSEM_REG_DBG_SHIFT, MSEM_REG_DBG_FORCE_VALID,
+       MSEM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 9
+};
+
+static struct block_defs block_usem_defs = {
+       "usem", {true, true, true}, true, DBG_USTORM_ID,
+       {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
+       USEM_REG_DBG_SELECT, USEM_REG_DBG_DWORD_ENABLE,
+       USEM_REG_DBG_SHIFT, USEM_REG_DBG_FORCE_VALID,
+       USEM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 9
+};
+
+static struct block_defs block_xsem_defs = {
+       "xsem", {true, true, true}, true, DBG_XSTORM_ID,
+       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
+       XSEM_REG_DBG_SELECT, XSEM_REG_DBG_DWORD_ENABLE,
+       XSEM_REG_DBG_SHIFT, XSEM_REG_DBG_FORCE_VALID,
+       XSEM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 21
+};
+
+static struct block_defs block_ysem_defs = {
+       "ysem", {true, true, true}, true, DBG_YSTORM_ID,
+       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY},
+       YSEM_REG_DBG_SELECT, YSEM_REG_DBG_DWORD_ENABLE,
+       YSEM_REG_DBG_SHIFT, YSEM_REG_DBG_FORCE_VALID,
+       YSEM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 11
+};
+
+static struct block_defs block_psem_defs = {
+       "psem", {true, true, true}, true, DBG_PSTORM_ID,
+       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
+       PSEM_REG_DBG_SELECT, PSEM_REG_DBG_DWORD_ENABLE,
+       PSEM_REG_DBG_SHIFT, PSEM_REG_DBG_FORCE_VALID,
+       PSEM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 10
+};
+
+static struct block_defs block_rss_defs = {
+       "rss", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
+       RSS_REG_DBG_SELECT, RSS_REG_DBG_DWORD_ENABLE,
+       RSS_REG_DBG_SHIFT, RSS_REG_DBG_FORCE_VALID,
+       RSS_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 18
+};
+
+static struct block_defs block_tmld_defs = {
+       "tmld", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+       TMLD_REG_DBG_SELECT, TMLD_REG_DBG_DWORD_ENABLE,
+       TMLD_REG_DBG_SHIFT, TMLD_REG_DBG_FORCE_VALID,
+       TMLD_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 13
+};
+
+static struct block_defs block_muld_defs = {
+       "muld", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
+       MULD_REG_DBG_SELECT, MULD_REG_DBG_DWORD_ENABLE,
+       MULD_REG_DBG_SHIFT, MULD_REG_DBG_FORCE_VALID,
+       MULD_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 14
+};
+
+static struct block_defs block_yuld_defs = {
+       "yuld", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
+       YULD_REG_DBG_SELECT, YULD_REG_DBG_DWORD_ENABLE,
+       YULD_REG_DBG_SHIFT, YULD_REG_DBG_FORCE_VALID,
+       YULD_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 15
+};
+
+static struct block_defs block_xyld_defs = {
+       "xyld", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
+       XYLD_REG_DBG_SELECT, XYLD_REG_DBG_DWORD_ENABLE,
+       XYLD_REG_DBG_SHIFT, XYLD_REG_DBG_FORCE_VALID,
+       XYLD_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 12
+};
+
+static struct block_defs block_prm_defs = {
+       "prm", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+       PRM_REG_DBG_SELECT, PRM_REG_DBG_DWORD_ENABLE,
+       PRM_REG_DBG_SHIFT, PRM_REG_DBG_FORCE_VALID,
+       PRM_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 21
+};
+
+static struct block_defs block_pbf_pb1_defs = {
+       "pbf_pb1", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV},
+       PBF_PB1_REG_DBG_SELECT, PBF_PB1_REG_DBG_DWORD_ENABLE,
+       PBF_PB1_REG_DBG_SHIFT, PBF_PB1_REG_DBG_FORCE_VALID,
+       PBF_PB1_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1,
+       11
+};
+
+static struct block_defs block_pbf_pb2_defs = {
+       "pbf_pb2", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV},
+       PBF_PB2_REG_DBG_SELECT, PBF_PB2_REG_DBG_DWORD_ENABLE,
+       PBF_PB2_REG_DBG_SHIFT, PBF_PB2_REG_DBG_FORCE_VALID,
+       PBF_PB2_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1,
+       12
+};
+
+static struct block_defs block_rpb_defs = {
+       "rpb", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+       RPB_REG_DBG_SELECT, RPB_REG_DBG_DWORD_ENABLE,
+       RPB_REG_DBG_SHIFT, RPB_REG_DBG_FORCE_VALID,
+       RPB_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 13
+};
+
+static struct block_defs block_btb_defs = {
+       "btb", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCV},
+       BTB_REG_DBG_SELECT, BTB_REG_DBG_DWORD_ENABLE,
+       BTB_REG_DBG_SHIFT, BTB_REG_DBG_FORCE_VALID,
+       BTB_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 10
+};
+
+static struct block_defs block_pbf_defs = {
+       "pbf", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV},
+       PBF_REG_DBG_SELECT, PBF_REG_DBG_DWORD_ENABLE,
+       PBF_REG_DBG_SHIFT, PBF_REG_DBG_FORCE_VALID,
+       PBF_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 15
+};
+
+static struct block_defs block_rdif_defs = {
+       "rdif", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+       RDIF_REG_DBG_SELECT, RDIF_REG_DBG_DWORD_ENABLE,
+       RDIF_REG_DBG_SHIFT, RDIF_REG_DBG_FORCE_VALID,
+       RDIF_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 16
+};
+
+static struct block_defs block_tdif_defs = {
+       "tdif", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
+       TDIF_REG_DBG_SELECT, TDIF_REG_DBG_DWORD_ENABLE,
+       TDIF_REG_DBG_SHIFT, TDIF_REG_DBG_FORCE_VALID,
+       TDIF_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 17
+};
+
+static struct block_defs block_cdu_defs = {
+       "cdu", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
+       CDU_REG_DBG_SELECT, CDU_REG_DBG_DWORD_ENABLE,
+       CDU_REG_DBG_SHIFT, CDU_REG_DBG_FORCE_VALID,
+       CDU_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 23
+};
+
+static struct block_defs block_ccfc_defs = {
+       "ccfc", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
+       CCFC_REG_DBG_SELECT, CCFC_REG_DBG_DWORD_ENABLE,
+       CCFC_REG_DBG_SHIFT, CCFC_REG_DBG_FORCE_VALID,
+       CCFC_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 24
+};
+
+static struct block_defs block_tcfc_defs = {
+       "tcfc", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
+       TCFC_REG_DBG_SELECT, TCFC_REG_DBG_DWORD_ENABLE,
+       TCFC_REG_DBG_SHIFT, TCFC_REG_DBG_FORCE_VALID,
+       TCFC_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 25
+};
+
+static struct block_defs block_igu_defs = {
+       "igu", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       IGU_REG_DBG_SELECT, IGU_REG_DBG_DWORD_ENABLE,
+       IGU_REG_DBG_SHIFT, IGU_REG_DBG_FORCE_VALID,
+       IGU_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 27
+};
+
+static struct block_defs block_cau_defs = {
+       "cau", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+       CAU_REG_DBG_SELECT, CAU_REG_DBG_DWORD_ENABLE,
+       CAU_REG_DBG_SHIFT, CAU_REG_DBG_FORCE_VALID,
+       CAU_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 19
+};
+
+static struct block_defs block_umac_defs = {
+       "umac", {false, false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ},
+       UMAC_REG_DBG_SELECT, UMAC_REG_DBG_DWORD_ENABLE,
+       UMAC_REG_DBG_SHIFT, UMAC_REG_DBG_FORCE_VALID,
+       UMAC_REG_DBG_FORCE_FRAME,
+       true, false, DBG_RESET_REG_MISCS_PL_HV, 6
+};
+
+static struct block_defs block_xmac_defs = {
+       "xmac", {false, false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_dbg_defs = {
+       "dbg", {false, false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 3
+};
+
+static struct block_defs block_nig_defs = {
+       "nig", {true, true, true}, false, 0,
+       {DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN},
+       NIG_REG_DBG_SELECT, NIG_REG_DBG_DWORD_ENABLE,
+       NIG_REG_DBG_SHIFT, NIG_REG_DBG_FORCE_VALID,
+       NIG_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 0
+};
+
+static struct block_defs block_wol_defs = {
+       "wol", {false, false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ},
+       WOL_REG_DBG_SELECT, WOL_REG_DBG_DWORD_ENABLE,
+       WOL_REG_DBG_SHIFT, WOL_REG_DBG_FORCE_VALID,
+       WOL_REG_DBG_FORCE_FRAME,
+       true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 7
+};
+
+static struct block_defs block_bmbn_defs = {
+       "bmbn", {false, false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCB},
+       BMBN_REG_DBG_SELECT, BMBN_REG_DBG_DWORD_ENABLE,
+       BMBN_REG_DBG_SHIFT, BMBN_REG_DBG_FORCE_VALID,
+       BMBN_REG_DBG_FORCE_FRAME,
+       false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_ipc_defs = {
+       "ipc", {false, false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       true, false, DBG_RESET_REG_MISCS_PL_UA, 8
+};
+
+static struct block_defs block_nwm_defs = {
+       "nwm", {false, false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW},
+       NWM_REG_DBG_SELECT, NWM_REG_DBG_DWORD_ENABLE,
+       NWM_REG_DBG_SHIFT, NWM_REG_DBG_FORCE_VALID,
+       NWM_REG_DBG_FORCE_FRAME,
+       true, false, DBG_RESET_REG_MISCS_PL_HV_2, 0
+};
+
+static struct block_defs block_nws_defs = {
+       "nws", {false, false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       true, false, DBG_RESET_REG_MISCS_PL_HV, 12
+};
+
+static struct block_defs block_ms_defs = {
+       "ms", {false, false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       true, false, DBG_RESET_REG_MISCS_PL_HV, 13
+};
+
+static struct block_defs block_phy_pcie_defs = {
+       "phy_pcie", {false, false, true}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH},
+       PCIE_REG_DBG_COMMON_SELECT, PCIE_REG_DBG_COMMON_DWORD_ENABLE,
+       PCIE_REG_DBG_COMMON_SHIFT, PCIE_REG_DBG_COMMON_FORCE_VALID,
+       PCIE_REG_DBG_COMMON_FORCE_FRAME,
+       false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_led_defs = {
+       "led", {false, false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       true, true, DBG_RESET_REG_MISCS_PL_HV, 14
+};
+
+static struct block_defs block_misc_aeu_defs = {
+       "misc_aeu", {false, false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_bar0_map_defs = {
+       "bar0_map", {false, false, false}, false, 0,
+       {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+       0, 0, 0, 0, 0,
+       false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs *s_block_defs[MAX_BLOCK_ID] = {
+       &block_grc_defs,
+       &block_miscs_defs,
+       &block_misc_defs,
+       &block_dbu_defs,
+       &block_pglue_b_defs,
+       &block_cnig_defs,
+       &block_cpmu_defs,
+       &block_ncsi_defs,
+       &block_opte_defs,
+       &block_bmb_defs,
+       &block_pcie_defs,
+       &block_mcp_defs,
+       &block_mcp2_defs,
+       &block_pswhst_defs,
+       &block_pswhst2_defs,
+       &block_pswrd_defs,
+       &block_pswrd2_defs,
+       &block_pswwr_defs,
+       &block_pswwr2_defs,
+       &block_pswrq_defs,
+       &block_pswrq2_defs,
+       &block_pglcs_defs,
+       &block_dmae_defs,
+       &block_ptu_defs,
+       &block_tcm_defs,
+       &block_mcm_defs,
+       &block_ucm_defs,
+       &block_xcm_defs,
+       &block_ycm_defs,
+       &block_pcm_defs,
+       &block_qm_defs,
+       &block_tm_defs,
+       &block_dorq_defs,
+       &block_brb_defs,
+       &block_src_defs,
+       &block_prs_defs,
+       &block_tsdm_defs,
+       &block_msdm_defs,
+       &block_usdm_defs,
+       &block_xsdm_defs,
+       &block_ysdm_defs,
+       &block_psdm_defs,
+       &block_tsem_defs,
+       &block_msem_defs,
+       &block_usem_defs,
+       &block_xsem_defs,
+       &block_ysem_defs,
+       &block_psem_defs,
+       &block_rss_defs,
+       &block_tmld_defs,
+       &block_muld_defs,
+       &block_yuld_defs,
+       &block_xyld_defs,
+       &block_prm_defs,
+       &block_pbf_pb1_defs,
+       &block_pbf_pb2_defs,
+       &block_rpb_defs,
+       &block_btb_defs,
+       &block_pbf_defs,
+       &block_rdif_defs,
+       &block_tdif_defs,
+       &block_cdu_defs,
+       &block_ccfc_defs,
+       &block_tcfc_defs,
+       &block_igu_defs,
+       &block_cau_defs,
+       &block_umac_defs,
+       &block_xmac_defs,
+       &block_dbg_defs,
+       &block_nig_defs,
+       &block_wol_defs,
+       &block_bmbn_defs,
+       &block_ipc_defs,
+       &block_nwm_defs,
+       &block_nws_defs,
+       &block_ms_defs,
+       &block_phy_pcie_defs,
+       &block_led_defs,
+       &block_misc_aeu_defs,
+       &block_bar0_map_defs,
+};
+
+static struct platform_defs s_platform_defs[] = {
+       {"asic", 1},
+       {"reserved", 0},
+       {"reserved2", 0},
+       {"reserved3", 0}
+};
+
+static struct grc_param_defs s_grc_param_defs[] = {
+       {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_TSTORM */
+       {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_MSTORM */
+       {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_USTORM */
+       {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_XSTORM */
+       {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_YSTORM */
+       {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_PSTORM */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_REGS */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_RAM */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_PBUF */
+       {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_IOR */
+       {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_VFC */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CM_CTX */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_ILT */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_RSS */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CAU */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_QM */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_MCP */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_RESERVED */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CFC */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_IGU */
+       {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_BRB */
+       {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_BTB */
+       {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_BMB */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_NIG */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_MULD */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_PRS */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_DMAE */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_TM */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_SDM */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_DIF */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_STATIC */
+       {{0, 0, 0}, 0, 1, false, 0, 0}, /* DBG_GRC_PARAM_UNSTALL */
+       {{MAX_LCIDS, MAX_LCIDS, MAX_LCIDS}, 1, MAX_LCIDS, false, MAX_LCIDS,
+        MAX_LCIDS},                    /* DBG_GRC_PARAM_NUM_LCIDS */
+       {{MAX_LTIDS, MAX_LTIDS, MAX_LTIDS}, 1, MAX_LTIDS, false, MAX_LTIDS,
+        MAX_LTIDS},                    /* DBG_GRC_PARAM_NUM_LTIDS */
+       {{0, 0, 0}, 0, 1, true, 0, 0},  /* DBG_GRC_PARAM_EXCLUDE_ALL */
+       {{0, 0, 0}, 0, 1, true, 0, 0},  /* DBG_GRC_PARAM_CRASH */
+       {{0, 0, 0}, 0, 1, false, 1, 0}, /* DBG_GRC_PARAM_PARITY_SAFE */
+       {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CM */
+       {{1, 1, 1}, 0, 1, false, 0, 1}  /* DBG_GRC_PARAM_DUMP_PHY */
+};
+
+static struct rss_mem_defs s_rss_mem_defs[] = {
+       { "rss_mem_cid", "rss_cid", 0,
+         {256, 256, 320},
+         {32, 32, 32} },
+       { "rss_mem_key_msb", "rss_key", 1024,
+         {128, 128, 208},
+         {256, 256, 256} },
+       { "rss_mem_key_lsb", "rss_key", 2048,
+         {128, 128, 208},
+         {64, 64, 64} },
+       { "rss_mem_info", "rss_info", 3072,
+         {128, 128, 208},
+         {16, 16, 16} },
+       { "rss_mem_ind", "rss_ind", 4096,
+         {(128 * 128), (128 * 128), (128 * 208)},
+         {16, 16, 16} }
+};
+
+static struct vfc_ram_defs s_vfc_ram_defs[] = {
+       {"vfc_ram_tt1", "vfc_ram", 0, 512},
+       {"vfc_ram_mtt2", "vfc_ram", 512, 128},
+       {"vfc_ram_stt2", "vfc_ram", 640, 32},
+       {"vfc_ram_ro_vect", "vfc_ram", 672, 32}
+};
+
+static struct big_ram_defs s_big_ram_defs[] = {
+       { "BRB", MEM_GROUP_BRB_MEM, MEM_GROUP_BRB_RAM, DBG_GRC_PARAM_DUMP_BRB,
+         BRB_REG_BIG_RAM_ADDRESS, BRB_REG_BIG_RAM_DATA,
+         {4800, 4800, 5632} },
+       { "BTB", MEM_GROUP_BTB_MEM, MEM_GROUP_BTB_RAM, DBG_GRC_PARAM_DUMP_BTB,
+         BTB_REG_BIG_RAM_ADDRESS, BTB_REG_BIG_RAM_DATA,
+         {2880, 2880, 3680} },
+       { "BMB", MEM_GROUP_BMB_MEM, MEM_GROUP_BMB_RAM, DBG_GRC_PARAM_DUMP_BMB,
+         BMB_REG_BIG_RAM_ADDRESS, BMB_REG_BIG_RAM_DATA,
+         {1152, 1152, 1152} }
+};
+
+static struct reset_reg_defs s_reset_regs_defs[] = {
+       { MISCS_REG_RESET_PL_UA, 0x0,
+         {true, true, true} },         /* DBG_RESET_REG_MISCS_PL_UA */
+       { MISCS_REG_RESET_PL_HV, 0x0,
+         {true, true, true} },         /* DBG_RESET_REG_MISCS_PL_HV */
+       { MISCS_REG_RESET_PL_HV_2, 0x0,
+         {false, false, true} },       /* DBG_RESET_REG_MISCS_PL_HV_2 */
+       { MISC_REG_RESET_PL_UA, 0x0,
+         {true, true, true} },         /* DBG_RESET_REG_MISC_PL_UA */
+       { MISC_REG_RESET_PL_HV, 0x0,
+         {true, true, true} },         /* DBG_RESET_REG_MISC_PL_HV */
+       { MISC_REG_RESET_PL_PDA_VMAIN_1, 0x4404040,
+         {true, true, true} },         /* DBG_RESET_REG_MISC_PL_PDA_VMAIN_1 */
+       { MISC_REG_RESET_PL_PDA_VMAIN_2, 0x7c00007,
+         {true, true, true} },         /* DBG_RESET_REG_MISC_PL_PDA_VMAIN_2 */
+       { MISC_REG_RESET_PL_PDA_VAUX, 0x2,
+         {true, true, true} },         /* DBG_RESET_REG_MISC_PL_PDA_VAUX */
+};
+
+static struct phy_defs s_phy_defs[] = {
+       {"nw_phy", NWS_REG_NWS_CMU, PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_7_0,
+        PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_15_8,
+        PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_7_0,
+        PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_11_8},
+       {"sgmii_phy", MS_REG_MS_CMU, PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X132,
+        PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X133,
+        PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X130,
+        PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X131},
+       {"pcie_phy0", PHY_PCIE_REG_PHY0, PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132,
+        PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133,
+        PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130,
+        PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131},
+       {"pcie_phy1", PHY_PCIE_REG_PHY1, PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132,
+        PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133,
+        PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130,
+        PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131},
+};
+
+/**************************** Private Functions ******************************/
+
+/* Reads and returns a single dword from the specified unaligned buffer */
+static u32 qed_read_unaligned_dword(u8 *buf)
+{
+       u32 dword;
+
+       memcpy((u8 *)&dword, buf, sizeof(dword));
+       return dword;
+}
+
+/* Initializes debug data for the specified device */
+static enum dbg_status qed_dbg_dev_init(struct qed_hwfn *p_hwfn,
+                                       struct qed_ptt *p_ptt)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+
+       if (dev_data->initialized)
+               return DBG_STATUS_OK;
+
+       if (QED_IS_K2(p_hwfn->cdev)) {
+               dev_data->chip_id = CHIP_K2;
+               dev_data->mode_enable[MODE_K2] = 1;
+       } else if (QED_IS_BB_B0(p_hwfn->cdev)) {
+               dev_data->chip_id = CHIP_BB_B0;
+               dev_data->mode_enable[MODE_BB_B0] = 1;
+       } else {
+               return DBG_STATUS_UNKNOWN_CHIP;
+       }
+
+       dev_data->platform_id = PLATFORM_ASIC;
+       dev_data->mode_enable[MODE_ASIC] = 1;
+       dev_data->initialized = true;
+       return DBG_STATUS_OK;
+}
+
+/* Reads the FW info structure for the specified Storm from the chip,
+ * and writes it to the specified fw_info pointer.
+ */
+static void qed_read_fw_info(struct qed_hwfn *p_hwfn,
+                            struct qed_ptt *p_ptt,
+                            u8 storm_id, struct fw_info *fw_info)
+{
+       /* Read first the address that points to fw_info location.
+        * The address is located in the last line of the Storm RAM.
+        */
+       u32 addr = s_storm_defs[storm_id].sem_fast_mem_addr +
+                  SEM_FAST_REG_INT_RAM +
+                  DWORDS_TO_BYTES(SEM_FAST_REG_INT_RAM_SIZE) -
+                  sizeof(struct fw_info_location);
+       struct fw_info_location fw_info_location;
+       u32 *dest = (u32 *)&fw_info_location;
+       u32 i;
+
+       memset(&fw_info_location, 0, sizeof(fw_info_location));
+       memset(fw_info, 0, sizeof(*fw_info));
+       for (i = 0; i < BYTES_TO_DWORDS(sizeof(fw_info_location));
+            i++, addr += BYTES_IN_DWORD)
+               dest[i] = qed_rd(p_hwfn, p_ptt, addr);
+       if (fw_info_location.size > 0 && fw_info_location.size <=
+           sizeof(*fw_info)) {
+               /* Read FW version info from Storm RAM */
+               addr = fw_info_location.grc_addr;
+               dest = (u32 *)fw_info;
+               for (i = 0; i < BYTES_TO_DWORDS(fw_info_location.size);
+                    i++, addr += BYTES_IN_DWORD)
+                       dest[i] = qed_rd(p_hwfn, p_ptt, addr);
+       }
+}
+
+/* Dumps the specified string to the specified buffer. Returns the dumped size
+ * in bytes (actual length + 1 for the null character termination).
+ */
+static u32 qed_dump_str(char *dump_buf, bool dump, const char *str)
+{
+       if (dump)
+               strcpy(dump_buf, str);
+       return (u32)strlen(str) + 1;
+}
+
+/* Dumps zeros to align the specified buffer to dwords. Returns the dumped size
+ * in bytes.
+ */
+static u32 qed_dump_align(char *dump_buf, bool dump, u32 byte_offset)
+{
+       u8 offset_in_dword = (u8)(byte_offset & 0x3), align_size;
+
+       align_size = offset_in_dword ? BYTES_IN_DWORD - offset_in_dword : 0;
+
+       if (dump && align_size)
+               memset(dump_buf, 0, align_size);
+       return align_size;
+}
+
+/* Writes the specified string param to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_dump_str_param(u32 *dump_buf,
+                             bool dump,
+                             const char *param_name, const char *param_val)
+{
+       char *char_buf = (char *)dump_buf;
+       u32 offset = 0;
+
+       /* Dump param name */
+       offset += qed_dump_str(char_buf + offset, dump, param_name);
+
+       /* Indicate a string param value */
+       if (dump)
+               *(char_buf + offset) = 1;
+       offset++;
+
+       /* Dump param value */
+       offset += qed_dump_str(char_buf + offset, dump, param_val);
+
+       /* Align buffer to next dword */
+       offset += qed_dump_align(char_buf + offset, dump, offset);
+       return BYTES_TO_DWORDS(offset);
+}
+
+/* Writes the specified numeric param to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_dump_num_param(u32 *dump_buf,
+                             bool dump, const char *param_name, u32 param_val)
+{
+       char *char_buf = (char *)dump_buf;
+       u32 offset = 0;
+
+       /* Dump param name */
+       offset += qed_dump_str(char_buf + offset, dump, param_name);
+
+       /* Indicate a numeric param value */
+       if (dump)
+               *(char_buf + offset) = 0;
+       offset++;
+
+       /* Align buffer to next dword */
+       offset += qed_dump_align(char_buf + offset, dump, offset);
+
+       /* Dump param value (and change offset from bytes to dwords) */
+       offset = BYTES_TO_DWORDS(offset);
+       if (dump)
+               *(dump_buf + offset) = param_val;
+       offset++;
+       return offset;
+}
+
+/* Reads the FW version and writes it as a param to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_dump_fw_ver_param(struct qed_hwfn *p_hwfn,
+                                struct qed_ptt *p_ptt,
+                                u32 *dump_buf, bool dump)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       char fw_ver_str[16] = EMPTY_FW_VERSION_STR;
+       char fw_img_str[16] = EMPTY_FW_IMAGE_STR;
+       struct fw_info fw_info = { {0}, {0} };
+       int printed_chars;
+       u32 offset = 0;
+
+       if (dump) {
+               /* Read FW image/version from PRAM in a non-reset SEMI */
+               bool found = false;
+               u8 storm_id;
+
+               for (storm_id = 0; storm_id < MAX_DBG_STORMS && !found;
+                    storm_id++) {
+                       /* Read FW version/image  */
+                       if (!dev_data->block_in_reset
+                           [s_storm_defs[storm_id].block_id]) {
+                               /* read FW info for the current Storm */
+                               qed_read_fw_info(p_hwfn,
+                                                p_ptt, storm_id, &fw_info);
+
+                               /* Create FW version/image strings */
+                               printed_chars =
+                                   snprintf(fw_ver_str,
+                                            sizeof(fw_ver_str),
+                                            "%d_%d_%d_%d",
+                                            fw_info.ver.num.major,
+                                            fw_info.ver.num.minor,
+                                            fw_info.ver.num.rev,
+                                            fw_info.ver.num.eng);
+                               if (printed_chars < 0 || printed_chars >=
+                                   sizeof(fw_ver_str))
+                                       DP_NOTICE(p_hwfn,
+                                                 "Unexpected debug error: invalid FW version string\n");
+                               switch (fw_info.ver.image_id) {
+                               case FW_IMG_MAIN:
+                                       strcpy(fw_img_str, "main");
+                                       break;
+                               default:
+                                       strcpy(fw_img_str, "unknown");
+                                       break;
+                               }
+
+                               found = true;
+                       }
+               }
+       }
+
+       /* Dump FW version, image and timestamp */
+       offset += qed_dump_str_param(dump_buf + offset,
+                                    dump, "fw-version", fw_ver_str);
+       offset += qed_dump_str_param(dump_buf + offset,
+                                    dump, "fw-image", fw_img_str);
+       offset += qed_dump_num_param(dump_buf + offset,
+                                    dump,
+                                    "fw-timestamp", fw_info.ver.timestamp);
+       return offset;
+}
+
+/* Reads the MFW version and writes it as a param to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_dump_mfw_ver_param(struct qed_hwfn *p_hwfn,
+                                 struct qed_ptt *p_ptt,
+                                 u32 *dump_buf, bool dump)
+{
+       char mfw_ver_str[16] = EMPTY_FW_VERSION_STR;
+
+       if (dump) {
+               u32 global_section_offsize, global_section_addr, mfw_ver;
+               u32 public_data_addr, global_section_offsize_addr;
+               int printed_chars;
+
+               /* Find MCP public data GRC address.
+                * Needs to be ORed with MCP_REG_SCRATCH due to a HW bug.
+                */
+               public_data_addr = qed_rd(p_hwfn, p_ptt,
+                                         MISC_REG_SHARED_MEM_ADDR) |
+                                         MCP_REG_SCRATCH;
+
+               /* Find MCP public global section offset */
+               global_section_offsize_addr = public_data_addr +
+                                             offsetof(struct mcp_public_data,
+                                                      sections) +
+                                             sizeof(offsize_t) * PUBLIC_GLOBAL;
+               global_section_offsize = qed_rd(p_hwfn, p_ptt,
+                                               global_section_offsize_addr);
+               global_section_addr = MCP_REG_SCRATCH +
+                                     (global_section_offsize &
+                                      OFFSIZE_OFFSET_MASK) * 4;
+
+               /* Read MFW version from MCP public global section */
+               mfw_ver = qed_rd(p_hwfn, p_ptt,
+                                global_section_addr +
+                                offsetof(struct public_global, mfw_ver));
+
+               /* Dump MFW version param */
+               printed_chars = snprintf(mfw_ver_str, sizeof(mfw_ver_str),
+                                        "%d_%d_%d_%d",
+                                        (u8) (mfw_ver >> 24),
+                                        (u8) (mfw_ver >> 16),
+                                        (u8) (mfw_ver >> 8),
+                                        (u8) mfw_ver);
+               if (printed_chars < 0 || printed_chars >= sizeof(mfw_ver_str))
+                       DP_NOTICE(p_hwfn,
+                                 "Unexpected debug error: invalid MFW version string\n");
+       }
+
+       return qed_dump_str_param(dump_buf, dump, "mfw-version", mfw_ver_str);
+}
+
+/* Writes a section header to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_dump_section_hdr(u32 *dump_buf,
+                               bool dump, const char *name, u32 num_params)
+{
+       return qed_dump_num_param(dump_buf, dump, name, num_params);
+}
+
+/* Writes the common global params to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_dump_common_global_params(struct qed_hwfn *p_hwfn,
+                                        struct qed_ptt *p_ptt,
+                                        u32 *dump_buf,
+                                        bool dump,
+                                        u8 num_specific_global_params)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       u32 offset = 0;
+
+       /* Find platform string and dump global params section header */
+       offset += qed_dump_section_hdr(dump_buf + offset,
+                                      dump,
+                                      "global_params",
+                                      NUM_COMMON_GLOBAL_PARAMS +
+                                      num_specific_global_params);
+
+       /* Store params */
+       offset += qed_dump_fw_ver_param(p_hwfn, p_ptt, dump_buf + offset, dump);
+       offset += qed_dump_mfw_ver_param(p_hwfn,
+                                        p_ptt, dump_buf + offset, dump);
+       offset += qed_dump_num_param(dump_buf + offset,
+                                    dump, "tools-version", TOOLS_VERSION);
+       offset += qed_dump_str_param(dump_buf + offset,
+                                    dump,
+                                    "chip",
+                                    s_chip_defs[dev_data->chip_id].name);
+       offset += qed_dump_str_param(dump_buf + offset,
+                                    dump,
+                                    "platform",
+                                    s_platform_defs[dev_data->platform_id].
+                                    name);
+       offset +=
+           qed_dump_num_param(dump_buf + offset, dump, "pci-func",
+                              p_hwfn->abs_pf_id);
+       return offset;
+}
+
+/* Writes the last section to the specified buffer at the given offset.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_dump_last_section(u32 *dump_buf, u32 offset, bool dump)
+{
+       u32 start_offset = offset, crc = ~0;
+
+       /* Dump CRC section header */
+       offset += qed_dump_section_hdr(dump_buf + offset, dump, "last", 0);
+
+       /* Calculate CRC32 and add it to the dword following the "last" section.
+        */
+       if (dump)
+               *(dump_buf + offset) = ~crc32(crc, (u8 *)dump_buf,
+                                             DWORDS_TO_BYTES(offset));
+       offset++;
+       return offset - start_offset;
+}
+
+/* Update blocks reset state  */
+static void qed_update_blocks_reset_state(struct qed_hwfn *p_hwfn,
+                                         struct qed_ptt *p_ptt)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       u32 reg_val[MAX_DBG_RESET_REGS] = { 0 };
+       u32 i;
+
+       /* Read reset registers */
+       for (i = 0; i < MAX_DBG_RESET_REGS; i++)
+               if (s_reset_regs_defs[i].exists[dev_data->chip_id])
+                       reg_val[i] = qed_rd(p_hwfn,
+                                           p_ptt, s_reset_regs_defs[i].addr);
+
+       /* Check if blocks are in reset */
+       for (i = 0; i < MAX_BLOCK_ID; i++)
+               dev_data->block_in_reset[i] =
+                   s_block_defs[i]->has_reset_bit &&
+                   !(reg_val[s_block_defs[i]->reset_reg] &
+                     BIT(s_block_defs[i]->reset_bit_offset));
+}
+
+/* Enable / disable the Debug block */
+static void qed_bus_enable_dbg_block(struct qed_hwfn *p_hwfn,
+                                    struct qed_ptt *p_ptt, bool enable)
+{
+       qed_wr(p_hwfn, p_ptt, DBG_REG_DBG_BLOCK_ON, enable ? 1 : 0);
+}
+
+/* Resets the Debug block */
+static void qed_bus_reset_dbg_block(struct qed_hwfn *p_hwfn,
+                                   struct qed_ptt *p_ptt)
+{
+       u32 dbg_reset_reg_addr, old_reset_reg_val, new_reset_reg_val;
+
+       dbg_reset_reg_addr =
+               s_reset_regs_defs[s_block_defs[BLOCK_DBG]->reset_reg].addr;
+       old_reset_reg_val = qed_rd(p_hwfn, p_ptt, dbg_reset_reg_addr);
+       new_reset_reg_val = old_reset_reg_val &
+                           ~BIT(s_block_defs[BLOCK_DBG]->reset_bit_offset);
+
+       qed_wr(p_hwfn, p_ptt, dbg_reset_reg_addr, new_reset_reg_val);
+       qed_wr(p_hwfn, p_ptt, dbg_reset_reg_addr, old_reset_reg_val);
+}
+
+static void qed_bus_set_framing_mode(struct qed_hwfn *p_hwfn,
+                                    struct qed_ptt *p_ptt,
+                                    enum dbg_bus_frame_modes mode)
+{
+       qed_wr(p_hwfn, p_ptt, DBG_REG_FRAMING_MODE, (u8)mode);
+}
+
+/* Enable / disable Debug Bus clients according to the specified mask.
+ * (1 = enable, 0 = disable)
+ */
+static void qed_bus_enable_clients(struct qed_hwfn *p_hwfn,
+                                  struct qed_ptt *p_ptt, u32 client_mask)
+{
+       qed_wr(p_hwfn, p_ptt, DBG_REG_CLIENT_ENABLE, client_mask);
+}
+
+static bool qed_is_mode_match(struct qed_hwfn *p_hwfn, u16 *modes_buf_offset)
+{
+       const u32 *ptr = s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr;
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       u8 tree_val = ((u8 *)ptr)[(*modes_buf_offset)++];
+       bool arg1, arg2;
+
+       switch (tree_val) {
+       case INIT_MODE_OP_NOT:
+               return !qed_is_mode_match(p_hwfn, modes_buf_offset);
+       case INIT_MODE_OP_OR:
+       case INIT_MODE_OP_AND:
+               arg1 = qed_is_mode_match(p_hwfn, modes_buf_offset);
+               arg2 = qed_is_mode_match(p_hwfn, modes_buf_offset);
+               return (tree_val == INIT_MODE_OP_OR) ? (arg1 ||
+                                                       arg2) : (arg1 && arg2);
+       default:
+               return dev_data->mode_enable[tree_val - MAX_INIT_MODE_OPS] > 0;
+       }
+}
+
+/* Returns the value of the specified GRC param */
+static u32 qed_grc_get_param(struct qed_hwfn *p_hwfn,
+                            enum dbg_grc_params grc_param)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+
+       return dev_data->grc.param_val[grc_param];
+}
+
+/* Clear all GRC params */
+static void qed_dbg_grc_clear_params(struct qed_hwfn *p_hwfn)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       u32 i;
+
+       for (i = 0; i < MAX_DBG_GRC_PARAMS; i++)
+               dev_data->grc.param_set_by_user[i] = 0;
+}
+
+/* Assign default GRC param values */
+static void qed_dbg_grc_set_params_default(struct qed_hwfn *p_hwfn)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       u32 i;
+
+       for (i = 0; i < MAX_DBG_GRC_PARAMS; i++)
+               if (!dev_data->grc.param_set_by_user[i])
+                       dev_data->grc.param_val[i] =
+                           s_grc_param_defs[i].default_val[dev_data->chip_id];
+}
+
+/* Returns true if the specified entity (indicated by GRC param) should be
+ * included in the dump, false otherwise.
+ */
+static bool qed_grc_is_included(struct qed_hwfn *p_hwfn,
+                               enum dbg_grc_params grc_param)
+{
+       return qed_grc_get_param(p_hwfn, grc_param) > 0;
+}
+
+/* Returns true of the specified Storm should be included in the dump, false
+ * otherwise.
+ */
+static bool qed_grc_is_storm_included(struct qed_hwfn *p_hwfn,
+                                     enum dbg_storms storm)
+{
+       return qed_grc_get_param(p_hwfn, (enum dbg_grc_params)storm) > 0;
+}
+
+/* Returns true if the specified memory should be included in the dump, false
+ * otherwise.
+ */
+static bool qed_grc_is_mem_included(struct qed_hwfn *p_hwfn,
+                                   enum block_id block_id, u8 mem_group_id)
+{
+       u8 i;
+
+       /* Check Storm match */
+       if (s_block_defs[block_id]->associated_to_storm &&
+           !qed_grc_is_storm_included(p_hwfn,
+                       (enum dbg_storms)s_block_defs[block_id]->storm_id))
+               return false;
+
+       for (i = 0; i < NUM_BIG_RAM_TYPES; i++)
+               if (mem_group_id == s_big_ram_defs[i].mem_group_id ||
+                   mem_group_id == s_big_ram_defs[i].ram_mem_group_id)
+                       return qed_grc_is_included(p_hwfn,
+                                                  s_big_ram_defs[i].grc_param);
+       if (mem_group_id == MEM_GROUP_PXP_ILT || mem_group_id ==
+           MEM_GROUP_PXP_MEM)
+               return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PXP);
+       if (mem_group_id == MEM_GROUP_RAM)
+               return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_RAM);
+       if (mem_group_id == MEM_GROUP_PBUF)
+               return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PBUF);
+       if (mem_group_id == MEM_GROUP_CAU_MEM ||
+           mem_group_id == MEM_GROUP_CAU_SB ||
+           mem_group_id == MEM_GROUP_CAU_PI)
+               return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CAU);
+       if (mem_group_id == MEM_GROUP_QM_MEM)
+               return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_QM);
+       if (mem_group_id == MEM_GROUP_CONN_CFC_MEM ||
+           mem_group_id == MEM_GROUP_TASK_CFC_MEM)
+               return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CFC);
+       if (mem_group_id == MEM_GROUP_IGU_MEM || mem_group_id ==
+           MEM_GROUP_IGU_MSIX)
+               return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IGU);
+       if (mem_group_id == MEM_GROUP_MULD_MEM)
+               return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_MULD);
+       if (mem_group_id == MEM_GROUP_PRS_MEM)
+               return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PRS);
+       if (mem_group_id == MEM_GROUP_DMAE_MEM)
+               return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_DMAE);
+       if (mem_group_id == MEM_GROUP_TM_MEM)
+               return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_TM);
+       if (mem_group_id == MEM_GROUP_SDM_MEM)
+               return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_SDM);
+       if (mem_group_id == MEM_GROUP_TDIF_CTX || mem_group_id ==
+           MEM_GROUP_RDIF_CTX)
+               return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_DIF);
+       if (mem_group_id == MEM_GROUP_CM_MEM)
+               return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CM);
+       if (mem_group_id == MEM_GROUP_IOR)
+               return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IOR);
+
+       return true;
+}
+
+/* Stalls all Storms */
+static void qed_grc_stall_storms(struct qed_hwfn *p_hwfn,
+                                struct qed_ptt *p_ptt, bool stall)
+{
+       u8 reg_val = stall ? 1 : 0;
+       u8 storm_id;
+
+       for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
+               if (qed_grc_is_storm_included(p_hwfn,
+                                             (enum dbg_storms)storm_id)) {
+                       u32 reg_addr =
+                           s_storm_defs[storm_id].sem_fast_mem_addr +
+                           SEM_FAST_REG_STALL_0;
+
+                       qed_wr(p_hwfn, p_ptt, reg_addr, reg_val);
+               }
+       }
+
+       msleep(STALL_DELAY_MS);
+}
+
+/* Takes all blocks out of reset */
+static void qed_grc_unreset_blocks(struct qed_hwfn *p_hwfn,
+                                  struct qed_ptt *p_ptt)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       u32 reg_val[MAX_DBG_RESET_REGS] = { 0 };
+       u32 i;
+
+       /* Fill reset regs values */
+       for (i = 0; i < MAX_BLOCK_ID; i++)
+               if (s_block_defs[i]->has_reset_bit && s_block_defs[i]->unreset)
+                       reg_val[s_block_defs[i]->reset_reg] |=
+                           BIT(s_block_defs[i]->reset_bit_offset);
+
+       /* Write reset registers */
+       for (i = 0; i < MAX_DBG_RESET_REGS; i++) {
+               if (s_reset_regs_defs[i].exists[dev_data->chip_id]) {
+                       reg_val[i] |= s_reset_regs_defs[i].unreset_val;
+                       if (reg_val[i])
+                               qed_wr(p_hwfn,
+                                      p_ptt,
+                                      s_reset_regs_defs[i].addr +
+                                      RESET_REG_UNRESET_OFFSET, reg_val[i]);
+               }
+       }
+}
+
+/* Returns the attention name offsets of the specified block */
+static const struct dbg_attn_block_type_data *
+qed_get_block_attn_data(enum block_id block_id, enum dbg_attn_type attn_type)
+{
+       const struct dbg_attn_block *base_attn_block_arr =
+               (const struct dbg_attn_block *)
+               s_dbg_arrays[BIN_BUF_DBG_ATTN_BLOCKS].ptr;
+
+       return &base_attn_block_arr[block_id].per_type_data[attn_type];
+}
+
+/* Returns the attention registers of the specified block */
+static const struct dbg_attn_reg *
+qed_get_block_attn_regs(enum block_id block_id, enum dbg_attn_type attn_type,
+                       u8 *num_attn_regs)
+{
+       const struct dbg_attn_block_type_data *block_type_data =
+               qed_get_block_attn_data(block_id, attn_type);
+
+       *num_attn_regs = block_type_data->num_regs;
+       return &((const struct dbg_attn_reg *)
+                s_dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr)[block_type_data->
+                                                         regs_offset];
+}
+
+/* For each block, clear the status of all parities */
+static void qed_grc_clear_all_prty(struct qed_hwfn *p_hwfn,
+                                  struct qed_ptt *p_ptt)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       u8 reg_idx, num_attn_regs;
+       u32 block_id;
+
+       for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
+               const struct dbg_attn_reg *attn_reg_arr;
+
+               if (dev_data->block_in_reset[block_id])
+                       continue;
+
+               attn_reg_arr = qed_get_block_attn_regs((enum block_id)block_id,
+                                                      ATTN_TYPE_PARITY,
+                                                      &num_attn_regs);
+               for (reg_idx = 0; reg_idx < num_attn_regs; reg_idx++) {
+                       const struct dbg_attn_reg *reg_data =
+                               &attn_reg_arr[reg_idx];
+
+                       /* Check mode */
+                       bool eval_mode = GET_FIELD(reg_data->mode.data,
+                                                  DBG_MODE_HDR_EVAL_MODE) > 0;
+                       u16 modes_buf_offset =
+                               GET_FIELD(reg_data->mode.data,
+                                         DBG_MODE_HDR_MODES_BUF_OFFSET);
+
+                       if (!eval_mode ||
+                           qed_is_mode_match(p_hwfn, &modes_buf_offset))
+                               /* Mode match - read parity status read-clear
+                                * register.
+                                */
+                               qed_rd(p_hwfn, p_ptt,
+                                      DWORDS_TO_BYTES(reg_data->
+                                                      sts_clr_address));
+               }
+       }
+}
+
+/* Dumps GRC registers section header. Returns the dumped size in dwords.
+ * The following parameters are dumped:
+ * - 'count' = num_dumped_entries
+ * - 'split' = split_type
+ * - 'id'i = split_id (dumped only if split_id >= 0)
+ * - 'param_name' = param_val (user param, dumped only if param_name != NULL and
+ *     param_val != NULL)
+ */
+static u32 qed_grc_dump_regs_hdr(u32 *dump_buf,
+                                bool dump,
+                                u32 num_reg_entries,
+                                const char *split_type,
+                                int split_id,
+                                const char *param_name, const char *param_val)
+{
+       u8 num_params = 2 + (split_id >= 0 ? 1 : 0) + (param_name ? 1 : 0);
+       u32 offset = 0;
+
+       offset += qed_dump_section_hdr(dump_buf + offset,
+                                      dump, "grc_regs", num_params);
+       offset += qed_dump_num_param(dump_buf + offset,
+                                    dump, "count", num_reg_entries);
+       offset += qed_dump_str_param(dump_buf + offset,
+                                    dump, "split", split_type);
+       if (split_id >= 0)
+               offset += qed_dump_num_param(dump_buf + offset,
+                                            dump, "id", split_id);
+       if (param_name && param_val)
+               offset += qed_dump_str_param(dump_buf + offset,
+                                            dump, param_name, param_val);
+       return offset;
+}
+
+/* Dumps GRC register/memory. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_reg_entry(struct qed_hwfn *p_hwfn,
+                                 struct qed_ptt *p_ptt, u32 *dump_buf,
+                                 bool dump, u32 addr, u32 len)
+{
+       u32 offset = 0, i;
+
+       if (dump) {
+               *(dump_buf + offset++) = addr | (len << REG_DUMP_LEN_SHIFT);
+               for (i = 0; i < len; i++, addr++, offset++)
+                       *(dump_buf + offset) = qed_rd(p_hwfn,
+                                                     p_ptt,
+                                                     DWORDS_TO_BYTES(addr));
+       } else {
+               offset += len + 1;
+       }
+
+       return offset;
+}
+
+/* Dumps GRC registers entries. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_regs_entries(struct qed_hwfn *p_hwfn,
+                                    struct qed_ptt *p_ptt,
+                                    struct dbg_array input_regs_arr,
+                                    u32 *dump_buf,
+                                    bool dump,
+                                    bool block_enable[MAX_BLOCK_ID],
+                                    u32 *num_dumped_reg_entries)
+{
+       u32 i, offset = 0, input_offset = 0;
+       bool mode_match = true;
+
+       *num_dumped_reg_entries = 0;
+       while (input_offset < input_regs_arr.size_in_dwords) {
+               const struct dbg_dump_cond_hdr *cond_hdr =
+                   (const struct dbg_dump_cond_hdr *)
+                   &input_regs_arr.ptr[input_offset++];
+               bool eval_mode = GET_FIELD(cond_hdr->mode.data,
+                                          DBG_MODE_HDR_EVAL_MODE) > 0;
+
+               /* Check mode/block */
+               if (eval_mode) {
+                       u16 modes_buf_offset =
+                               GET_FIELD(cond_hdr->mode.data,
+                                         DBG_MODE_HDR_MODES_BUF_OFFSET);
+                       mode_match = qed_is_mode_match(p_hwfn,
+                                                      &modes_buf_offset);
+               }
+
+               if (mode_match && block_enable[cond_hdr->block_id]) {
+                       for (i = 0; i < cond_hdr->data_size;
+                            i++, input_offset++) {
+                               const struct dbg_dump_reg *reg =
+                                   (const struct dbg_dump_reg *)
+                                   &input_regs_arr.ptr[input_offset];
+
+                               offset +=
+                                       qed_grc_dump_reg_entry(p_hwfn, p_ptt,
+                                                   dump_buf + offset, dump,
+                                                   GET_FIELD(reg->data,
+                                                       DBG_DUMP_REG_ADDRESS),
+                                                   GET_FIELD(reg->data,
+                                                       DBG_DUMP_REG_LENGTH));
+                               (*num_dumped_reg_entries)++;
+                       }
+               } else {
+                       input_offset += cond_hdr->data_size;
+               }
+       }
+
+       return offset;
+}
+
+/* Dumps GRC registers entries. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_split_data(struct qed_hwfn *p_hwfn,
+                                  struct qed_ptt *p_ptt,
+                                  struct dbg_array input_regs_arr,
+                                  u32 *dump_buf,
+                                  bool dump,
+                                  bool block_enable[MAX_BLOCK_ID],
+                                  const char *split_type_name,
+                                  u32 split_id,
+                                  const char *param_name,
+                                  const char *param_val)
+{
+       u32 num_dumped_reg_entries, offset;
+
+       /* Calculate register dump header size (and skip it for now) */
+       offset = qed_grc_dump_regs_hdr(dump_buf,
+                                      false,
+                                      0,
+                                      split_type_name,
+                                      split_id, param_name, param_val);
+
+       /* Dump registers */
+       offset += qed_grc_dump_regs_entries(p_hwfn,
+                                           p_ptt,
+                                           input_regs_arr,
+                                           dump_buf + offset,
+                                           dump,
+                                           block_enable,
+                                           &num_dumped_reg_entries);
+
+       /* Write register dump header */
+       if (dump && num_dumped_reg_entries > 0)
+               qed_grc_dump_regs_hdr(dump_buf,
+                                     dump,
+                                     num_dumped_reg_entries,
+                                     split_type_name,
+                                     split_id, param_name, param_val);
+
+       return num_dumped_reg_entries > 0 ? offset : 0;
+}
+
+/* Dumps registers according to the input registers array.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn,
+                                 struct qed_ptt *p_ptt,
+                                 u32 *dump_buf,
+                                 bool dump,
+                                 bool block_enable[MAX_BLOCK_ID],
+                                 const char *param_name, const char *param_val)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       u32 offset = 0, input_offset = 0;
+       u8 port_id, pf_id;
+
+       if (dump)
+               DP_VERBOSE(p_hwfn, QED_MSG_DEBUG, "Dumping registers...\n");
+       while (input_offset <
+              s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].size_in_dwords) {
+               const struct dbg_dump_split_hdr *split_hdr =
+                       (const struct dbg_dump_split_hdr *)
+                       &s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr[input_offset++];
+               u8 split_type_id = GET_FIELD(split_hdr->hdr,
+                                            DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID);
+               u32 split_data_size = GET_FIELD(split_hdr->hdr,
+                                               DBG_DUMP_SPLIT_HDR_DATA_SIZE);
+               struct dbg_array curr_input_regs_arr = {
+                       &s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr[input_offset],
+                       split_data_size};
+
+               switch (split_type_id) {
+               case SPLIT_TYPE_NONE:
+               case SPLIT_TYPE_VF:
+                       offset += qed_grc_dump_split_data(p_hwfn,
+                                                         p_ptt,
+                                                         curr_input_regs_arr,
+                                                         dump_buf + offset,
+                                                         dump,
+                                                         block_enable,
+                                                         "eng",
+                                                         (u32)(-1),
+                                                         param_name,
+                                                         param_val);
+                       break;
+               case SPLIT_TYPE_PORT:
+                       for (port_id = 0;
+                            port_id <
+                            s_chip_defs[dev_data->chip_id].
+                            per_platform[dev_data->platform_id].num_ports;
+                            port_id++) {
+                               if (dump)
+                                       qed_port_pretend(p_hwfn, p_ptt,
+                                                        port_id);
+                               offset +=
+                                   qed_grc_dump_split_data(p_hwfn, p_ptt,
+                                                           curr_input_regs_arr,
+                                                           dump_buf + offset,
+                                                           dump, block_enable,
+                                                           "port", port_id,
+                                                           param_name,
+                                                           param_val);
+                       }
+                       break;
+               case SPLIT_TYPE_PF:
+               case SPLIT_TYPE_PORT_PF:
+                       for (pf_id = 0;
+                            pf_id <
+                            s_chip_defs[dev_data->chip_id].
+                            per_platform[dev_data->platform_id].num_pfs;
+                            pf_id++) {
+                               if (dump)
+                                       qed_fid_pretend(p_hwfn, p_ptt, pf_id);
+                               offset += qed_grc_dump_split_data(p_hwfn,
+                                                       p_ptt,
+                                                       curr_input_regs_arr,
+                                                       dump_buf + offset,
+                                                       dump, block_enable,
+                                                       "pf", pf_id, param_name,
+                                                       param_val);
+                       }
+                       break;
+               default:
+                       break;
+               }
+
+               input_offset += split_data_size;
+       }
+
+       /* Pretend to original PF */
+       if (dump)
+               qed_fid_pretend(p_hwfn, p_ptt, p_hwfn->rel_pf_id);
+       return offset;
+}
+
+/* Dump reset registers. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_reset_regs(struct qed_hwfn *p_hwfn,
+                                  struct qed_ptt *p_ptt,
+                                  u32 *dump_buf, bool dump)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       u32 i, offset = 0, num_regs = 0;
+
+       /* Calculate header size */
+       offset += qed_grc_dump_regs_hdr(dump_buf,
+                                       false, 0, "eng", -1, NULL, NULL);
+
+       /* Write reset registers */
+       for (i = 0; i < MAX_DBG_RESET_REGS; i++) {
+               if (s_reset_regs_defs[i].exists[dev_data->chip_id]) {
+                       offset += qed_grc_dump_reg_entry(p_hwfn,
+                                                        p_ptt,
+                                                        dump_buf + offset,
+                                                        dump,
+                                                        BYTES_TO_DWORDS
+                                                        (s_reset_regs_defs
+                                                         [i].addr), 1);
+                       num_regs++;
+               }
+       }
+
+       /* Write header */
+       if (dump)
+               qed_grc_dump_regs_hdr(dump_buf,
+                                     true, num_regs, "eng", -1, NULL, NULL);
+       return offset;
+}
+
+/* Dump registers that are modified during GRC Dump and therefore must be dumped
+ * first. Returns the dumped size in dwords.
+ */
+static u32 qed_grc_dump_modified_regs(struct qed_hwfn *p_hwfn,
+                                     struct qed_ptt *p_ptt,
+                                     u32 *dump_buf, bool dump)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       u32 offset = 0, num_reg_entries = 0, block_id;
+       u8 storm_id, reg_idx, num_attn_regs;
+
+       /* Calculate header size */
+       offset += qed_grc_dump_regs_hdr(dump_buf,
+                                       false, 0, "eng", -1, NULL, NULL);
+
+       /* Write parity registers */
+       for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
+               const struct dbg_attn_reg *attn_reg_arr;
+
+               if (dev_data->block_in_reset[block_id] && dump)
+                       continue;
+
+               attn_reg_arr = qed_get_block_attn_regs((enum block_id)block_id,
+                                                      ATTN_TYPE_PARITY,
+                                                      &num_attn_regs);
+               for (reg_idx = 0; reg_idx < num_attn_regs; reg_idx++) {
+                       const struct dbg_attn_reg *reg_data =
+                               &attn_reg_arr[reg_idx];
+                       u16 modes_buf_offset;
+                       bool eval_mode;
+
+                       /* Check mode */
+                       eval_mode = GET_FIELD(reg_data->mode.data,
+                                             DBG_MODE_HDR_EVAL_MODE) > 0;
+                       modes_buf_offset =
+                               GET_FIELD(reg_data->mode.data,
+                                         DBG_MODE_HDR_MODES_BUF_OFFSET);
+                       if (!eval_mode ||
+                           qed_is_mode_match(p_hwfn, &modes_buf_offset)) {
+                               /* Mode match - read and dump registers */
+                               offset += qed_grc_dump_reg_entry(p_hwfn,
+                                                       p_ptt,
+                                                       dump_buf + offset,
+                                                       dump,
+                                                       reg_data->mask_address,
+                                                       1);
+                               offset += qed_grc_dump_reg_entry(p_hwfn,
+                                               p_ptt,
+                                               dump_buf + offset,
+                                               dump,
+                                               GET_FIELD(reg_data->data,
+                                                   DBG_ATTN_REG_STS_ADDRESS),
+                                               1);
+                               num_reg_entries += 2;
+                       }
+               }
+       }
+
+       /* Write storm stall status registers */
+       for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
+               if (dev_data->block_in_reset[s_storm_defs[storm_id].block_id] &&
+                   dump)
+                       continue;
+
+               offset += qed_grc_dump_reg_entry(p_hwfn,
+                                       p_ptt,
+                                       dump_buf + offset,
+                                       dump,
+                                       BYTES_TO_DWORDS(s_storm_defs[storm_id].
+                                                       sem_fast_mem_addr +
+                                                       SEM_FAST_REG_STALLED),
+                                       1);
+               num_reg_entries++;
+       }
+
+       /* Write header */
+       if (dump)
+               qed_grc_dump_regs_hdr(dump_buf,
+                                     true,
+                                     num_reg_entries, "eng", -1, NULL, NULL);
+       return offset;
+}
+
+/* Dumps a GRC memory header (section and params).
+ * The following parameters are dumped:
+ * name - name is dumped only if it's not NULL.
+ * addr - byte_addr is dumped only if name is NULL.
+ * len - dword_len is always dumped.
+ * width - bit_width is dumped if it's not zero.
+ * packed - packed=1 is dumped if it's not false.
+ * mem_group - mem_group is always dumped.
+ * is_storm - true only if the memory is related to a Storm.
+ * storm_letter - storm letter (valid only if is_storm is true).
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_grc_dump_mem_hdr(struct qed_hwfn *p_hwfn,
+                               u32 *dump_buf,
+                               bool dump,
+                               const char *name,
+                               u32 byte_addr,
+                               u32 dword_len,
+                               u32 bit_width,
+                               bool packed,
+                               const char *mem_group,
+                               bool is_storm, char storm_letter)
+{
+       u8 num_params = 3;
+       u32 offset = 0;
+       char buf[64];
+
+       if (!dword_len)
+               DP_NOTICE(p_hwfn,
+                         "Unexpected GRC Dump error: dumped memory size must be non-zero\n");
+       if (bit_width)
+               num_params++;
+       if (packed)
+               num_params++;
+
+       /* Dump section header */
+       offset += qed_dump_section_hdr(dump_buf + offset,
+                                      dump, "grc_mem", num_params);
+       if (name) {
+               /* Dump name */
+               if (is_storm) {
+                       strcpy(buf, "?STORM_");
+                       buf[0] = storm_letter;
+                       strcpy(buf + strlen(buf), name);
+               } else {
+                       strcpy(buf, name);
+               }
+
+               offset += qed_dump_str_param(dump_buf + offset,
+                                            dump, "name", buf);
+               if (dump)
+                       DP_VERBOSE(p_hwfn,
+                                  QED_MSG_DEBUG,
+                                  "Dumping %d registers from %s...\n",
+                                  dword_len, buf);
+       } else {
+               /* Dump address */
+               offset += qed_dump_num_param(dump_buf + offset,
+                                            dump, "addr", byte_addr);
+               if (dump && dword_len > 64)
+                       DP_VERBOSE(p_hwfn,
+                                  QED_MSG_DEBUG,
+                                  "Dumping %d registers from address 0x%x...\n",
+                                  dword_len, byte_addr);
+       }
+
+       /* Dump len */
+       offset += qed_dump_num_param(dump_buf + offset, dump, "len", dword_len);
+
+       /* Dump bit width */
+       if (bit_width)
+               offset += qed_dump_num_param(dump_buf + offset,
+                                            dump, "width", bit_width);
+
+       /* Dump packed */
+       if (packed)
+               offset += qed_dump_num_param(dump_buf + offset,
+                                            dump, "packed", 1);
+
+       /* Dump reg type */
+       if (is_storm) {
+               strcpy(buf, "?STORM_");
+               buf[0] = storm_letter;
+               strcpy(buf + strlen(buf), mem_group);
+       } else {
+               strcpy(buf, mem_group);
+       }
+
+       offset += qed_dump_str_param(dump_buf + offset, dump, "type", buf);
+       return offset;
+}
+
+/* Dumps a single GRC memory. If name is NULL, the memory is stored by address.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_grc_dump_mem(struct qed_hwfn *p_hwfn,
+                           struct qed_ptt *p_ptt,
+                           u32 *dump_buf,
+                           bool dump,
+                           const char *name,
+                           u32 byte_addr,
+                           u32 dword_len,
+                           u32 bit_width,
+                           bool packed,
+                           const char *mem_group,
+                           bool is_storm, char storm_letter)
+{
+       u32 offset = 0;
+
+       offset += qed_grc_dump_mem_hdr(p_hwfn,
+                                      dump_buf + offset,
+                                      dump,
+                                      name,
+                                      byte_addr,
+                                      dword_len,
+                                      bit_width,
+                                      packed,
+                                      mem_group, is_storm, storm_letter);
+       if (dump) {
+               u32 i;
+
+               for (i = 0; i < dword_len;
+                    i++, byte_addr += BYTES_IN_DWORD, offset++)
+                       *(dump_buf + offset) = qed_rd(p_hwfn, p_ptt, byte_addr);
+       } else {
+               offset += dword_len;
+       }
+
+       return offset;
+}
+
+/* Dumps GRC memories entries. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_mem_entries(struct qed_hwfn *p_hwfn,
+                                   struct qed_ptt *p_ptt,
+                                   struct dbg_array input_mems_arr,
+                                   u32 *dump_buf, bool dump)
+{
+       u32 i, offset = 0, input_offset = 0;
+       bool mode_match = true;
+
+       while (input_offset < input_mems_arr.size_in_dwords) {
+               const struct dbg_dump_cond_hdr *cond_hdr;
+               u32 num_entries;
+               bool eval_mode;
+
+               cond_hdr = (const struct dbg_dump_cond_hdr *)
+                          &input_mems_arr.ptr[input_offset++];
+               eval_mode = GET_FIELD(cond_hdr->mode.data,
+                                     DBG_MODE_HDR_EVAL_MODE) > 0;
+
+               /* Check required mode */
+               if (eval_mode) {
+                       u16 modes_buf_offset =
+                               GET_FIELD(cond_hdr->mode.data,
+                                         DBG_MODE_HDR_MODES_BUF_OFFSET);
+
+                       mode_match = qed_is_mode_match(p_hwfn,
+                                                      &modes_buf_offset);
+               }
+
+               if (!mode_match) {
+                       input_offset += cond_hdr->data_size;
+                       continue;
+               }
+
+               num_entries = cond_hdr->data_size / MEM_DUMP_ENTRY_SIZE_DWORDS;
+               for (i = 0; i < num_entries;
+                    i++, input_offset += MEM_DUMP_ENTRY_SIZE_DWORDS) {
+                       const struct dbg_dump_mem *mem =
+                               (const struct dbg_dump_mem *)
+                               &input_mems_arr.ptr[input_offset];
+                       u8 mem_group_id;
+
+                       mem_group_id = GET_FIELD(mem->dword0,
+                                                DBG_DUMP_MEM_MEM_GROUP_ID);
+                       if (mem_group_id >= MEM_GROUPS_NUM) {
+                               DP_NOTICE(p_hwfn, "Invalid mem_group_id\n");
+                               return 0;
+                       }
+
+                       if (qed_grc_is_mem_included(p_hwfn,
+                                       (enum block_id)cond_hdr->block_id,
+                                       mem_group_id)) {
+                               u32 mem_byte_addr =
+                                       DWORDS_TO_BYTES(GET_FIELD(mem->dword0,
+                                                       DBG_DUMP_MEM_ADDRESS));
+                               u32 mem_len = GET_FIELD(mem->dword1,
+                                                       DBG_DUMP_MEM_LENGTH);
+                               char storm_letter = 'a';
+                               bool is_storm = false;
+
+                               /* Update memory length for CCFC/TCFC memories
+                                * according to number of LCIDs/LTIDs.
+                                */
+                               if (mem_group_id == MEM_GROUP_CONN_CFC_MEM)
+                                       mem_len = qed_grc_get_param(p_hwfn,
+                                                       DBG_GRC_PARAM_NUM_LCIDS)
+                                                       * (mem_len / MAX_LCIDS);
+                               else if (mem_group_id == MEM_GROUP_TASK_CFC_MEM)
+                                       mem_len = qed_grc_get_param(p_hwfn,
+                                                       DBG_GRC_PARAM_NUM_LTIDS)
+                                                       * (mem_len / MAX_LTIDS);
+
+                               /* If memory is associated with Storm, update
+                                * Storm details.
+                                */
+                               if (s_block_defs[cond_hdr->block_id]->
+                                                       associated_to_storm) {
+                                       is_storm = true;
+                                       storm_letter =
+                                               s_storm_defs[s_block_defs[
+                                               cond_hdr->block_id]->
+                                               storm_id].letter;
+                               }
+
+                               /* Dump memory */
+                               offset += qed_grc_dump_mem(p_hwfn, p_ptt,
+                                               dump_buf + offset, dump, NULL,
+                                               mem_byte_addr, mem_len, 0,
+                                               false,
+                                               s_mem_group_names[mem_group_id],
+                                               is_storm, storm_letter);
+                               }
+                       }
+       }
+
+       return offset;
+}
+
+/* Dumps GRC memories according to the input array dump_mem.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_grc_dump_memories(struct qed_hwfn *p_hwfn,
+                                struct qed_ptt *p_ptt,
+                                u32 *dump_buf, bool dump)
+{
+       u32 offset = 0, input_offset = 0;
+
+       while (input_offset <
+              s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].size_in_dwords) {
+               const struct dbg_dump_split_hdr *split_hdr =
+                       (const struct dbg_dump_split_hdr *)
+                       &s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr[input_offset++];
+               u8 split_type_id = GET_FIELD(split_hdr->hdr,
+                                            DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID);
+               u32 split_data_size = GET_FIELD(split_hdr->hdr,
+                                               DBG_DUMP_SPLIT_HDR_DATA_SIZE);
+               struct dbg_array curr_input_mems_arr = {
+                       &s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr[input_offset],
+                       split_data_size};
+
+               switch (split_type_id) {
+               case SPLIT_TYPE_NONE:
+                       offset += qed_grc_dump_mem_entries(p_hwfn,
+                                                          p_ptt,
+                                                          curr_input_mems_arr,
+                                                          dump_buf + offset,
+                                                          dump);
+                       break;
+               default:
+                       DP_NOTICE(p_hwfn,
+                                 "Dumping split memories is currently not supported\n");
+                       break;
+               }
+
+               input_offset += split_data_size;
+       }
+
+       return offset;
+}
+
+/* Dumps GRC context data for the specified Storm.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_grc_dump_ctx_data(struct qed_hwfn *p_hwfn,
+                                struct qed_ptt *p_ptt,
+                                u32 *dump_buf,
+                                bool dump,
+                                const char *name,
+                                u32 num_lids,
+                                u32 lid_size,
+                                u32 rd_reg_addr,
+                                u8 storm_id)
+{
+       u32 i, lid, total_size;
+       u32 offset = 0;
+
+       if (!lid_size)
+               return 0;
+       lid_size *= BYTES_IN_DWORD;
+       total_size = num_lids * lid_size;
+       offset += qed_grc_dump_mem_hdr(p_hwfn,
+                                      dump_buf + offset,
+                                      dump,
+                                      name,
+                                      0,
+                                      total_size,
+                                      lid_size * 32,
+                                      false,
+                                      name,
+                                      true, s_storm_defs[storm_id].letter);
+
+       /* Dump context data */
+       if (dump) {
+               for (lid = 0; lid < num_lids; lid++) {
+                       for (i = 0; i < lid_size; i++, offset++) {
+                               qed_wr(p_hwfn,
+                                      p_ptt,
+                                      s_storm_defs[storm_id].cm_ctx_wr_addr,
+                                      BIT(9) | lid);
+                               *(dump_buf + offset) = qed_rd(p_hwfn,
+                                                             p_ptt,
+                                                             rd_reg_addr);
+                       }
+               }
+       } else {
+               offset += total_size;
+       }
+
+       return offset;
+}
+
+/* Dumps GRC contexts. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_ctx(struct qed_hwfn *p_hwfn,
+                           struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
+{
+       u32 offset = 0;
+       u8 storm_id;
+
+       for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
+               if (!qed_grc_is_storm_included(p_hwfn,
+                                              (enum dbg_storms)storm_id))
+                       continue;
+
+               /* Dump Conn AG context size */
+               offset +=
+                       qed_grc_dump_ctx_data(p_hwfn,
+                                             p_ptt,
+                                             dump_buf + offset,
+                                             dump,
+                                             "CONN_AG_CTX",
+                                             qed_grc_get_param(p_hwfn,
+                                                   DBG_GRC_PARAM_NUM_LCIDS),
+                                             s_storm_defs[storm_id].
+                                                   cm_conn_ag_ctx_lid_size,
+                                             s_storm_defs[storm_id].
+                                                   cm_conn_ag_ctx_rd_addr,
+                                             storm_id);
+
+               /* Dump Conn ST context size */
+               offset +=
+                       qed_grc_dump_ctx_data(p_hwfn,
+                                             p_ptt,
+                                             dump_buf + offset,
+                                             dump,
+                                             "CONN_ST_CTX",
+                                             qed_grc_get_param(p_hwfn,
+                                                   DBG_GRC_PARAM_NUM_LCIDS),
+                                             s_storm_defs[storm_id].
+                                                   cm_conn_st_ctx_lid_size,
+                                             s_storm_defs[storm_id].
+                                                   cm_conn_st_ctx_rd_addr,
+                                             storm_id);
+
+               /* Dump Task AG context size */
+               offset +=
+                       qed_grc_dump_ctx_data(p_hwfn,
+                                             p_ptt,
+                                             dump_buf + offset,
+                                             dump,
+                                             "TASK_AG_CTX",
+                                             qed_grc_get_param(p_hwfn,
+                                                   DBG_GRC_PARAM_NUM_LTIDS),
+                                             s_storm_defs[storm_id].
+                                                   cm_task_ag_ctx_lid_size,
+                                             s_storm_defs[storm_id].
+                                                   cm_task_ag_ctx_rd_addr,
+                                             storm_id);
+
+               /* Dump Task ST context size */
+               offset +=
+                       qed_grc_dump_ctx_data(p_hwfn,
+                                             p_ptt,
+                                             dump_buf + offset,
+                                             dump,
+                                             "TASK_ST_CTX",
+                                             qed_grc_get_param(p_hwfn,
+                                                   DBG_GRC_PARAM_NUM_LTIDS),
+                                             s_storm_defs[storm_id].
+                                                   cm_task_st_ctx_lid_size,
+                                             s_storm_defs[storm_id].
+                                                   cm_task_st_ctx_rd_addr,
+                                             storm_id);
+       }
+
+       return offset;
+}
+
+/* Dumps GRC IORs data. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_iors(struct qed_hwfn *p_hwfn,
+                            struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
+{
+       char buf[10] = "IOR_SET_?";
+       u8 storm_id, set_id;
+       u32 offset = 0;
+
+       for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
+               if (qed_grc_is_storm_included(p_hwfn,
+                                             (enum dbg_storms)storm_id)) {
+                       for (set_id = 0; set_id < NUM_IOR_SETS; set_id++) {
+                               u32 addr =
+                                   s_storm_defs[storm_id].sem_fast_mem_addr +
+                                   SEM_FAST_REG_STORM_REG_FILE +
+                                   DWORDS_TO_BYTES(IOR_SET_OFFSET(set_id));
+
+                               buf[strlen(buf) - 1] = '0' + set_id;
+                               offset += qed_grc_dump_mem(p_hwfn,
+                                                          p_ptt,
+                                                          dump_buf + offset,
+                                                          dump,
+                                                          buf,
+                                                          addr,
+                                                          IORS_PER_SET,
+                                                          32,
+                                                          false,
+                                                          "ior",
+                                                          true,
+                                                          s_storm_defs
+                                                          [storm_id].letter);
+                       }
+               }
+       }
+
+       return offset;
+}
+
+/* Dump VFC CAM. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_vfc_cam(struct qed_hwfn *p_hwfn,
+                               struct qed_ptt *p_ptt,
+                               u32 *dump_buf, bool dump, u8 storm_id)
+{
+       u32 total_size = VFC_CAM_NUM_ROWS * VFC_CAM_RESP_DWORDS;
+       u32 cam_addr[VFC_CAM_ADDR_DWORDS] = { 0 };
+       u32 cam_cmd[VFC_CAM_CMD_DWORDS] = { 0 };
+       u32 offset = 0;
+       u32 row, i;
+
+       offset += qed_grc_dump_mem_hdr(p_hwfn,
+                                      dump_buf + offset,
+                                      dump,
+                                      "vfc_cam",
+                                      0,
+                                      total_size,
+                                      256,
+                                      false,
+                                      "vfc_cam",
+                                      true, s_storm_defs[storm_id].letter);
+       if (dump) {
+               /* Prepare CAM address */
+               SET_VAR_FIELD(cam_addr, VFC_CAM_ADDR, OP, VFC_OPCODE_CAM_RD);
+               for (row = 0; row < VFC_CAM_NUM_ROWS;
+                    row++, offset += VFC_CAM_RESP_DWORDS) {
+                       /* Write VFC CAM command */
+                       SET_VAR_FIELD(cam_cmd, VFC_CAM_CMD, ROW, row);
+                       ARR_REG_WR(p_hwfn,
+                                  p_ptt,
+                                  s_storm_defs[storm_id].sem_fast_mem_addr +
+                                  SEM_FAST_REG_VFC_DATA_WR,
+                                  cam_cmd, VFC_CAM_CMD_DWORDS);
+
+                       /* Write VFC CAM address */
+                       ARR_REG_WR(p_hwfn,
+                                  p_ptt,
+                                  s_storm_defs[storm_id].sem_fast_mem_addr +
+                                  SEM_FAST_REG_VFC_ADDR,
+                                  cam_addr, VFC_CAM_ADDR_DWORDS);
+
+                       /* Read VFC CAM read response */
+                       ARR_REG_RD(p_hwfn,
+                                  p_ptt,
+                                  s_storm_defs[storm_id].sem_fast_mem_addr +
+                                  SEM_FAST_REG_VFC_DATA_RD,
+                                  dump_buf + offset, VFC_CAM_RESP_DWORDS);
+               }
+       } else {
+               offset += total_size;
+       }
+
+       return offset;
+}
+
+/* Dump VFC RAM. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_vfc_ram(struct qed_hwfn *p_hwfn,
+                               struct qed_ptt *p_ptt,
+                               u32 *dump_buf,
+                               bool dump,
+                               u8 storm_id, struct vfc_ram_defs *ram_defs)
+{
+       u32 total_size = ram_defs->num_rows * VFC_RAM_RESP_DWORDS;
+       u32 ram_addr[VFC_RAM_ADDR_DWORDS] = { 0 };
+       u32 ram_cmd[VFC_RAM_CMD_DWORDS] = { 0 };
+       u32 offset = 0;
+       u32 row, i;
+
+       offset += qed_grc_dump_mem_hdr(p_hwfn,
+                                      dump_buf + offset,
+                                      dump,
+                                      ram_defs->mem_name,
+                                      0,
+                                      total_size,
+                                      256,
+                                      false,
+                                      ram_defs->type_name,
+                                      true, s_storm_defs[storm_id].letter);
+
+       /* Prepare RAM address */
+       SET_VAR_FIELD(ram_addr, VFC_RAM_ADDR, OP, VFC_OPCODE_RAM_RD);
+
+       if (!dump)
+               return offset + total_size;
+
+       for (row = ram_defs->base_row;
+            row < ram_defs->base_row + ram_defs->num_rows;
+            row++, offset += VFC_RAM_RESP_DWORDS) {
+               /* Write VFC RAM command */
+               ARR_REG_WR(p_hwfn,
+                          p_ptt,
+                          s_storm_defs[storm_id].sem_fast_mem_addr +
+                          SEM_FAST_REG_VFC_DATA_WR,
+                          ram_cmd, VFC_RAM_CMD_DWORDS);
+
+               /* Write VFC RAM address */
+               SET_VAR_FIELD(ram_addr, VFC_RAM_ADDR, ROW, row);
+               ARR_REG_WR(p_hwfn,
+                          p_ptt,
+                          s_storm_defs[storm_id].sem_fast_mem_addr +
+                          SEM_FAST_REG_VFC_ADDR,
+                          ram_addr, VFC_RAM_ADDR_DWORDS);
+
+               /* Read VFC RAM read response */
+               ARR_REG_RD(p_hwfn,
+                          p_ptt,
+                          s_storm_defs[storm_id].sem_fast_mem_addr +
+                          SEM_FAST_REG_VFC_DATA_RD,
+                          dump_buf + offset, VFC_RAM_RESP_DWORDS);
+       }
+
+       return offset;
+}
+
+/* Dumps GRC VFC data. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_vfc(struct qed_hwfn *p_hwfn,
+                           struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       u8 storm_id, i;
+       u32 offset = 0;
+
+       for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
+               if (qed_grc_is_storm_included(p_hwfn,
+                                             (enum dbg_storms)storm_id) &&
+                   s_storm_defs[storm_id].has_vfc &&
+                   (storm_id != DBG_PSTORM_ID ||
+                    dev_data->platform_id == PLATFORM_ASIC)) {
+                       /* Read CAM */
+                       offset += qed_grc_dump_vfc_cam(p_hwfn,
+                                                      p_ptt,
+                                                      dump_buf + offset,
+                                                      dump, storm_id);
+
+                       /* Read RAM */
+                       for (i = 0; i < NUM_VFC_RAM_TYPES; i++)
+                               offset += qed_grc_dump_vfc_ram(p_hwfn,
+                                                              p_ptt,
+                                                              dump_buf +
+                                                              offset,
+                                                              dump,
+                                                              storm_id,
+                                                              &s_vfc_ram_defs
+                                                              [i]);
+               }
+       }
+
+       return offset;
+}
+
+/* Dumps GRC RSS data. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_rss(struct qed_hwfn *p_hwfn,
+                           struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       u32 offset = 0;
+       u8 rss_mem_id;
+
+       for (rss_mem_id = 0; rss_mem_id < NUM_RSS_MEM_TYPES; rss_mem_id++) {
+               struct rss_mem_defs *rss_defs = &s_rss_mem_defs[rss_mem_id];
+               u32 num_entries = rss_defs->num_entries[dev_data->chip_id];
+               u32 entry_width = rss_defs->entry_width[dev_data->chip_id];
+               u32 total_size = (num_entries * entry_width) / 32;
+               bool packed = (entry_width == 16);
+               u32 addr = rss_defs->addr;
+               u32 i, j;
+
+               offset += qed_grc_dump_mem_hdr(p_hwfn,
+                                              dump_buf + offset,
+                                              dump,
+                                              rss_defs->mem_name,
+                                              addr,
+                                              total_size,
+                                              entry_width,
+                                              packed,
+                                              rss_defs->type_name, false, 0);
+
+               if (!dump) {
+                       offset += total_size;
+                       continue;
+               }
+
+               /* Dump RSS data */
+               for (i = 0; i < BYTES_TO_DWORDS(total_size); i++, addr++) {
+                       qed_wr(p_hwfn, p_ptt, RSS_REG_RSS_RAM_ADDR, addr);
+                       for (j = 0; j < BYTES_IN_DWORD; j++, offset++)
+                               *(dump_buf + offset) =
+                                       qed_rd(p_hwfn, p_ptt,
+                                              RSS_REG_RSS_RAM_DATA +
+                                              DWORDS_TO_BYTES(j));
+               }
+       }
+
+       return offset;
+}
+
+/* Dumps GRC Big RAM. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn,
+                               struct qed_ptt *p_ptt,
+                               u32 *dump_buf, bool dump, u8 big_ram_id)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       char mem_name[12] = "???_BIG_RAM";
+       char type_name[8] = "???_RAM";
+       u32 ram_size, total_blocks;
+       u32 offset = 0, i, j;
+
+       total_blocks =
+               s_big_ram_defs[big_ram_id].num_of_blocks[dev_data->chip_id];
+       ram_size = total_blocks * BIG_RAM_BLOCK_SIZE_DWORDS;
+
+       strncpy(type_name, s_big_ram_defs[big_ram_id].instance_name,
+               strlen(s_big_ram_defs[big_ram_id].instance_name));
+       strncpy(mem_name, s_big_ram_defs[big_ram_id].instance_name,
+               strlen(s_big_ram_defs[big_ram_id].instance_name));
+
+       /* Dump memory header */
+       offset += qed_grc_dump_mem_hdr(p_hwfn,
+                                      dump_buf + offset,
+                                      dump,
+                                      mem_name,
+                                      0,
+                                      ram_size,
+                                      BIG_RAM_BLOCK_SIZE_BYTES * 8,
+                                      false, type_name, false, 0);
+
+       if (!dump)
+               return offset + ram_size;
+
+       /* Read and dump Big RAM data */
+       for (i = 0; i < total_blocks / 2; i++) {
+               qed_wr(p_hwfn, p_ptt, s_big_ram_defs[big_ram_id].addr_reg_addr,
+                      i);
+               for (j = 0; j < 2 * BIG_RAM_BLOCK_SIZE_DWORDS; j++, offset++)
+                       *(dump_buf + offset) = qed_rd(p_hwfn, p_ptt,
+                                               s_big_ram_defs[big_ram_id].
+                                                       data_reg_addr +
+                                               DWORDS_TO_BYTES(j));
+       }
+
+       return offset;
+}
+
+static u32 qed_grc_dump_mcp(struct qed_hwfn *p_hwfn,
+                           struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
+{
+       bool block_enable[MAX_BLOCK_ID] = { 0 };
+       bool halted = false;
+       u32 offset = 0;
+
+       /* Halt MCP */
+       if (dump) {
+               halted = !qed_mcp_halt(p_hwfn, p_ptt);
+               if (!halted)
+                       DP_NOTICE(p_hwfn, "MCP halt failed!\n");
+       }
+
+       /* Dump MCP scratchpad */
+       offset += qed_grc_dump_mem(p_hwfn,
+                                  p_ptt,
+                                  dump_buf + offset,
+                                  dump,
+                                  NULL,
+                                  MCP_REG_SCRATCH,
+                                  MCP_REG_SCRATCH_SIZE,
+                                  0, false, "MCP", false, 0);
+
+       /* Dump MCP cpu_reg_file */
+       offset += qed_grc_dump_mem(p_hwfn,
+                                  p_ptt,
+                                  dump_buf + offset,
+                                  dump,
+                                  NULL,
+                                  MCP_REG_CPU_REG_FILE,
+                                  MCP_REG_CPU_REG_FILE_SIZE,
+                                  0, false, "MCP", false, 0);
+
+       /* Dump MCP registers */
+       block_enable[BLOCK_MCP] = true;
+       offset += qed_grc_dump_registers(p_hwfn,
+                                        p_ptt,
+                                        dump_buf + offset,
+                                        dump, block_enable, "block", "MCP");
+
+       /* Dump required non-MCP registers */
+       offset += qed_grc_dump_regs_hdr(dump_buf + offset,
+                                       dump, 1, "eng", -1, "block", "MCP");
+       offset += qed_grc_dump_reg_entry(p_hwfn,
+                                        p_ptt,
+                                        dump_buf + offset,
+                                        dump,
+                                        BYTES_TO_DWORDS
+                                        (MISC_REG_SHARED_MEM_ADDR), 1);
+
+       /* Release MCP */
+       if (halted && qed_mcp_resume(p_hwfn, p_ptt))
+               DP_NOTICE(p_hwfn, "Failed to resume MCP after halt!\n");
+       return offset;
+}
+
+/* Dumps the tbus indirect memory for all PHYs. */
+static u32 qed_grc_dump_phy(struct qed_hwfn *p_hwfn,
+                           struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
+{
+       u32 offset = 0, tbus_lo_offset, tbus_hi_offset;
+       char mem_name[32];
+       u8 phy_id;
+
+       for (phy_id = 0; phy_id < ARRAY_SIZE(s_phy_defs); phy_id++) {
+               struct phy_defs *phy_defs = &s_phy_defs[phy_id];
+               int printed_chars;
+
+               printed_chars = snprintf(mem_name, sizeof(mem_name), "tbus_%s",
+                                        phy_defs->phy_name);
+               if (printed_chars < 0 || printed_chars >= sizeof(mem_name))
+                       DP_NOTICE(p_hwfn,
+                                 "Unexpected debug error: invalid PHY memory name\n");
+               offset += qed_grc_dump_mem_hdr(p_hwfn,
+                                              dump_buf + offset,
+                                              dump,
+                                              mem_name,
+                                              0,
+                                              PHY_DUMP_SIZE_DWORDS,
+                                              16, true, mem_name, false, 0);
+               if (dump) {
+                       u32 addr_lo_addr = phy_defs->base_addr +
+                                          phy_defs->tbus_addr_lo_addr;
+                       u32 addr_hi_addr = phy_defs->base_addr +
+                                          phy_defs->tbus_addr_hi_addr;
+                       u32 data_lo_addr = phy_defs->base_addr +
+                                          phy_defs->tbus_data_lo_addr;
+                       u32 data_hi_addr = phy_defs->base_addr +
+                                          phy_defs->tbus_data_hi_addr;
+                       u8 *bytes_buf = (u8 *)(dump_buf + offset);
+
+                       for (tbus_hi_offset = 0;
+                            tbus_hi_offset < (NUM_PHY_TBUS_ADDRESSES >> 8);
+                            tbus_hi_offset++) {
+                               qed_wr(p_hwfn,
+                                      p_ptt, addr_hi_addr, tbus_hi_offset);
+                               for (tbus_lo_offset = 0; tbus_lo_offset < 256;
+                                    tbus_lo_offset++) {
+                                       qed_wr(p_hwfn,
+                                              p_ptt,
+                                              addr_lo_addr, tbus_lo_offset);
+                                       *(bytes_buf++) =
+                                               (u8)qed_rd(p_hwfn, p_ptt,
+                                                          data_lo_addr);
+                                       *(bytes_buf++) =
+                                               (u8)qed_rd(p_hwfn, p_ptt,
+                                                          data_hi_addr);
+                               }
+                       }
+               }
+
+               offset += PHY_DUMP_SIZE_DWORDS;
+       }
+
+       return offset;
+}
+
+static void qed_config_dbg_line(struct qed_hwfn *p_hwfn,
+                               struct qed_ptt *p_ptt,
+                               enum block_id block_id,
+                               u8 line_id,
+                               u8 cycle_en,
+                               u8 right_shift, u8 force_valid, u8 force_frame)
+{
+       struct block_defs *p_block_defs = s_block_defs[block_id];
+
+       qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_select_addr, line_id);
+       qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_cycle_enable_addr, cycle_en);
+       qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_shift_addr, right_shift);
+       qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_force_valid_addr, force_valid);
+       qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_force_frame_addr, force_frame);
+}
+
+/* Dumps Static Debug data. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_static_debug(struct qed_hwfn *p_hwfn,
+                                    struct qed_ptt *p_ptt,
+                                    u32 *dump_buf, bool dump)
+{
+       u32 block_dwords = NUM_DBG_BUS_LINES * STATIC_DEBUG_LINE_DWORDS;
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       u32 offset = 0, block_id, line_id, addr, i;
+       struct block_defs *p_block_defs;
+
+       if (dump) {
+               DP_VERBOSE(p_hwfn,
+                          QED_MSG_DEBUG, "Dumping static debug data...\n");
+
+               /* Disable all blocks debug output */
+               for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
+                       p_block_defs = s_block_defs[block_id];
+
+                       if (p_block_defs->has_dbg_bus[dev_data->chip_id])
+                               qed_wr(p_hwfn, p_ptt,
+                                      p_block_defs->dbg_cycle_enable_addr, 0);
+               }
+
+               qed_bus_reset_dbg_block(p_hwfn, p_ptt);
+               qed_bus_set_framing_mode(p_hwfn,
+                                        p_ptt, DBG_BUS_FRAME_MODE_8HW_0ST);
+               qed_wr(p_hwfn,
+                      p_ptt, DBG_REG_DEBUG_TARGET, DBG_BUS_TARGET_ID_INT_BUF);
+               qed_wr(p_hwfn, p_ptt, DBG_REG_FULL_MODE, 1);
+               qed_bus_enable_dbg_block(p_hwfn, p_ptt, true);
+       }
+
+       /* Dump all static debug lines for each relevant block */
+       for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
+               p_block_defs = s_block_defs[block_id];
+
+               if (!p_block_defs->has_dbg_bus[dev_data->chip_id])
+                       continue;
+
+               /* Dump static section params */
+               offset += qed_grc_dump_mem_hdr(p_hwfn,
+                                              dump_buf + offset,
+                                              dump,
+                                              p_block_defs->name, 0,
+                                              block_dwords, 32, false,
+                                              "STATIC", false, 0);
+
+               if (dump && !dev_data->block_in_reset[block_id]) {
+                       u8 dbg_client_id =
+                               p_block_defs->dbg_client_id[dev_data->chip_id];
+
+                       /* Enable block's client */
+                       qed_bus_enable_clients(p_hwfn, p_ptt,
+                                              BIT(dbg_client_id));
+
+                       for (line_id = 0; line_id < NUM_DBG_BUS_LINES;
+                            line_id++) {
+                               /* Configure debug line ID */
+                               qed_config_dbg_line(p_hwfn,
+                                                   p_ptt,
+                                                   (enum block_id)block_id,
+                                                   (u8)line_id,
+                                                   0xf, 0, 0, 0);
+
+                               /* Read debug line info */
+                               for (i = 0, addr = DBG_REG_CALENDAR_OUT_DATA;
+                                    i < STATIC_DEBUG_LINE_DWORDS;
+                                    i++, offset++, addr += BYTES_IN_DWORD)
+                                       dump_buf[offset] = qed_rd(p_hwfn, p_ptt,
+                                                                 addr);
+                       }
+
+                       /* Disable block's client and debug output */
+                       qed_bus_enable_clients(p_hwfn, p_ptt, 0);
+                       qed_wr(p_hwfn, p_ptt,
+                              p_block_defs->dbg_cycle_enable_addr, 0);
+               } else {
+                       /* All lines are invalid - dump zeros */
+                       if (dump)
+                               memset(dump_buf + offset, 0,
+                                      DWORDS_TO_BYTES(block_dwords));
+                       offset += block_dwords;
+               }
+       }
+
+       if (dump) {
+               qed_bus_enable_dbg_block(p_hwfn, p_ptt, false);
+               qed_bus_enable_clients(p_hwfn, p_ptt, 0);
+       }
+
+       return offset;
+}
+
+/* Performs GRC Dump to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn,
+                                   struct qed_ptt *p_ptt,
+                                   u32 *dump_buf,
+                                   bool dump, u32 *num_dumped_dwords)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       bool parities_masked = false;
+       u8 i, port_mode = 0;
+       u32 offset = 0;
+
+       /* Check if emulation platform */
+       *num_dumped_dwords = 0;
+
+       /* Fill GRC parameters that were not set by the user with their default
+        * value.
+        */
+       qed_dbg_grc_set_params_default(p_hwfn);
+
+       /* Find port mode */
+       if (dump) {
+               switch (qed_rd(p_hwfn, p_ptt, MISC_REG_PORT_MODE)) {
+               case 0:
+                       port_mode = 1;
+                       break;
+               case 1:
+                       port_mode = 2;
+                       break;
+               case 2:
+                       port_mode = 4;
+                       break;
+               }
+       }
+
+       /* Update reset state */
+       if (dump)
+               qed_update_blocks_reset_state(p_hwfn, p_ptt);
+
+       /* Dump global params */
+       offset += qed_dump_common_global_params(p_hwfn,
+                                               p_ptt,
+                                               dump_buf + offset, dump, 4);
+       offset += qed_dump_str_param(dump_buf + offset,
+                                    dump, "dump-type", "grc-dump");
+       offset += qed_dump_num_param(dump_buf + offset,
+                                    dump,
+                                    "num-lcids",
+                                    qed_grc_get_param(p_hwfn,
+                                               DBG_GRC_PARAM_NUM_LCIDS));
+       offset += qed_dump_num_param(dump_buf + offset,
+                                    dump,
+                                    "num-ltids",
+                                    qed_grc_get_param(p_hwfn,
+                                               DBG_GRC_PARAM_NUM_LTIDS));
+       offset += qed_dump_num_param(dump_buf + offset,
+                                    dump, "num-ports", port_mode);
+
+       /* Dump reset registers (dumped before taking blocks out of reset ) */
+       if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_REGS))
+               offset += qed_grc_dump_reset_regs(p_hwfn,
+                                                 p_ptt,
+                                                 dump_buf + offset, dump);
+
+       /* Take all blocks out of reset (using reset registers) */
+       if (dump) {
+               qed_grc_unreset_blocks(p_hwfn, p_ptt);
+               qed_update_blocks_reset_state(p_hwfn, p_ptt);
+       }
+
+       /* Disable all parities using MFW command */
+       if (dump) {
+               parities_masked = !qed_mcp_mask_parities(p_hwfn, p_ptt, 1);
+               if (!parities_masked) {
+                       if (qed_grc_get_param
+                           (p_hwfn, DBG_GRC_PARAM_PARITY_SAFE))
+                               return DBG_STATUS_MCP_COULD_NOT_MASK_PRTY;
+                       else
+                               DP_NOTICE(p_hwfn,
+                                         "Failed to mask parities using MFW\n");
+               }
+       }
+
+       /* Dump modified registers (dumped before modifying them) */
+       if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_REGS))
+               offset += qed_grc_dump_modified_regs(p_hwfn,
+                                                    p_ptt,
+                                                    dump_buf + offset, dump);
+
+       /* Stall storms */
+       if (dump &&
+           (qed_grc_is_included(p_hwfn,
+                                DBG_GRC_PARAM_DUMP_IOR) ||
+            qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_VFC)))
+               qed_grc_stall_storms(p_hwfn, p_ptt, true);
+
+       /* Dump all regs  */
+       if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_REGS)) {
+               /* Dump all blocks except MCP */
+               bool block_enable[MAX_BLOCK_ID];
+
+               for (i = 0; i < MAX_BLOCK_ID; i++)
+                       block_enable[i] = true;
+               block_enable[BLOCK_MCP] = false;
+               offset += qed_grc_dump_registers(p_hwfn,
+                                                p_ptt,
+                                                dump_buf +
+                                                offset,
+                                                dump,
+                                                block_enable, NULL, NULL);
+       }
+
+       /* Dump memories */
+       offset += qed_grc_dump_memories(p_hwfn, p_ptt, dump_buf + offset, dump);
+
+       /* Dump MCP */
+       if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_MCP))
+               offset += qed_grc_dump_mcp(p_hwfn,
+                                          p_ptt, dump_buf + offset, dump);
+
+       /* Dump context */
+       if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CM_CTX))
+               offset += qed_grc_dump_ctx(p_hwfn,
+                                          p_ptt, dump_buf + offset, dump);
+
+       /* Dump RSS memories */
+       if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_RSS))
+               offset += qed_grc_dump_rss(p_hwfn,
+                                          p_ptt, dump_buf + offset, dump);
+
+       /* Dump Big RAM */
+       for (i = 0; i < NUM_BIG_RAM_TYPES; i++)
+               if (qed_grc_is_included(p_hwfn, s_big_ram_defs[i].grc_param))
+                       offset += qed_grc_dump_big_ram(p_hwfn,
+                                                      p_ptt,
+                                                      dump_buf + offset,
+                                                      dump, i);
+
+       /* Dump IORs */
+       if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IOR))
+               offset += qed_grc_dump_iors(p_hwfn,
+                                           p_ptt, dump_buf + offset, dump);
+
+       /* Dump VFC */
+       if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_VFC))
+               offset += qed_grc_dump_vfc(p_hwfn,
+                                          p_ptt, dump_buf + offset, dump);
+
+       /* Dump PHY tbus */
+       if (qed_grc_is_included(p_hwfn,
+                               DBG_GRC_PARAM_DUMP_PHY) && dev_data->chip_id ==
+           CHIP_K2 && dev_data->platform_id == PLATFORM_ASIC)
+               offset += qed_grc_dump_phy(p_hwfn,
+                                          p_ptt, dump_buf + offset, dump);
+
+       /* Dump static debug data  */
+       if (qed_grc_is_included(p_hwfn,
+                               DBG_GRC_PARAM_DUMP_STATIC) &&
+           dev_data->bus.state == DBG_BUS_STATE_IDLE)
+               offset += qed_grc_dump_static_debug(p_hwfn,
+                                                   p_ptt,
+                                                   dump_buf + offset, dump);
+
+       /* Dump last section */
+       offset += qed_dump_last_section(dump_buf, offset, dump);
+       if (dump) {
+               /* Unstall storms */
+               if (qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_UNSTALL))
+                       qed_grc_stall_storms(p_hwfn, p_ptt, false);
+
+               /* Clear parity status */
+               qed_grc_clear_all_prty(p_hwfn, p_ptt);
+
+               /* Enable all parities using MFW command */
+               if (parities_masked)
+                       qed_mcp_mask_parities(p_hwfn, p_ptt, 0);
+       }
+
+       *num_dumped_dwords = offset;
+
+       return DBG_STATUS_OK;
+}
+
+/* Writes the specified failing Idle Check rule to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_idle_chk_dump_failure(struct qed_hwfn *p_hwfn,
+                                    struct qed_ptt *p_ptt,
+                                    u32 *
+                                    dump_buf,
+                                    bool dump,
+                                    u16 rule_id,
+                                    const struct dbg_idle_chk_rule *rule,
+                                    u16 fail_entry_id, u32 *cond_reg_values)
+{
+       const union dbg_idle_chk_reg *regs = &((const union dbg_idle_chk_reg *)
+                                              s_dbg_arrays
+                                              [BIN_BUF_DBG_IDLE_CHK_REGS].
+                                              ptr)[rule->reg_offset];
+       const struct dbg_idle_chk_cond_reg *cond_regs = &regs[0].cond_reg;
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       struct dbg_idle_chk_result_hdr *hdr =
+               (struct dbg_idle_chk_result_hdr *)dump_buf;
+       const struct dbg_idle_chk_info_reg *info_regs =
+               &regs[rule->num_cond_regs].info_reg;
+       u32 next_reg_offset = 0, i, offset = 0;
+       u8 reg_id;
+
+       /* Dump rule data */
+       if (dump) {
+               memset(hdr, 0, sizeof(*hdr));
+               hdr->rule_id = rule_id;
+               hdr->mem_entry_id = fail_entry_id;
+               hdr->severity = rule->severity;
+               hdr->num_dumped_cond_regs = rule->num_cond_regs;
+       }
+
+       offset += IDLE_CHK_RESULT_HDR_DWORDS;
+
+       /* Dump condition register values */
+       for (reg_id = 0; reg_id < rule->num_cond_regs; reg_id++) {
+               const struct dbg_idle_chk_cond_reg *reg = &cond_regs[reg_id];
+
+               /* Write register header */
+               if (dump) {
+                       struct dbg_idle_chk_result_reg_hdr *reg_hdr =
+                           (struct dbg_idle_chk_result_reg_hdr *)(dump_buf
+                                                                  + offset);
+                       offset += IDLE_CHK_RESULT_REG_HDR_DWORDS;
+                       memset(reg_hdr, 0,
+                              sizeof(struct dbg_idle_chk_result_reg_hdr));
+                       reg_hdr->start_entry = reg->start_entry;
+                       reg_hdr->size = reg->entry_size;
+                       SET_FIELD(reg_hdr->data,
+                                 DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM,
+                                 reg->num_entries > 1 || reg->start_entry > 0
+                                 ? 1 : 0);
+                       SET_FIELD(reg_hdr->data,
+                                 DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID, reg_id);
+
+                       /* Write register values */
+                       for (i = 0; i < reg_hdr->size;
+                            i++, next_reg_offset++, offset++)
+                               dump_buf[offset] =
+                                   cond_reg_values[next_reg_offset];
+               } else {
+                       offset += IDLE_CHK_RESULT_REG_HDR_DWORDS +
+                           reg->entry_size;
+               }
+       }
+
+       /* Dump info register values */
+       for (reg_id = 0; reg_id < rule->num_info_regs; reg_id++) {
+               const struct dbg_idle_chk_info_reg *reg = &info_regs[reg_id];
+               u32 block_id;
+
+               if (!dump) {
+                       offset += IDLE_CHK_RESULT_REG_HDR_DWORDS + reg->size;
+                       continue;
+               }
+
+               /* Check if register's block is in reset */
+               block_id = GET_FIELD(reg->data, DBG_IDLE_CHK_INFO_REG_BLOCK_ID);
+               if (block_id >= MAX_BLOCK_ID) {
+                       DP_NOTICE(p_hwfn, "Invalid block_id\n");
+                       return 0;
+               }
+
+               if (!dev_data->block_in_reset[block_id]) {
+                       bool eval_mode = GET_FIELD(reg->mode.data,
+                                                  DBG_MODE_HDR_EVAL_MODE) > 0;
+                       bool mode_match = true;
+
+                       /* Check mode */
+                       if (eval_mode) {
+                               u16 modes_buf_offset =
+                                       GET_FIELD(reg->mode.data,
+                                               DBG_MODE_HDR_MODES_BUF_OFFSET);
+                               mode_match =
+                                       qed_is_mode_match(p_hwfn,
+                                                         &modes_buf_offset);
+                       }
+
+                       if (mode_match) {
+                               u32 grc_addr =
+                                       DWORDS_TO_BYTES(GET_FIELD(reg->data,
+                                               DBG_IDLE_CHK_INFO_REG_ADDRESS));
+
+                               /* Write register header */
+                               struct dbg_idle_chk_result_reg_hdr *reg_hdr =
+                                       (struct dbg_idle_chk_result_reg_hdr *)
+                                       (dump_buf + offset);
+
+                               offset += IDLE_CHK_RESULT_REG_HDR_DWORDS;
+                               hdr->num_dumped_info_regs++;
+                               memset(reg_hdr, 0, sizeof(*reg_hdr));
+                               reg_hdr->size = reg->size;
+                               SET_FIELD(reg_hdr->data,
+                                       DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID,
+                                       rule->num_cond_regs + reg_id);
+
+                               /* Write register values */
+                               for (i = 0; i < reg->size;
+                                    i++, offset++, grc_addr += 4)
+                                       dump_buf[offset] =
+                                               qed_rd(p_hwfn, p_ptt, grc_addr);
+                               }
+                       }
+       }
+
+       return offset;
+}
+
+/* Dumps idle check rule entries. Returns the dumped size in dwords. */
+static u32
+qed_idle_chk_dump_rule_entries(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+                              u32 *dump_buf, bool dump,
+                              const struct dbg_idle_chk_rule *input_rules,
+                              u32 num_input_rules, u32 *num_failing_rules)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       u32 cond_reg_values[IDLE_CHK_MAX_ENTRIES_SIZE];
+       u32 i, j, offset = 0;
+       u16 entry_id;
+       u8 reg_id;
+
+       *num_failing_rules = 0;
+       for (i = 0; i < num_input_rules; i++) {
+               const struct dbg_idle_chk_cond_reg *cond_regs;
+               const struct dbg_idle_chk_rule *rule;
+               const union dbg_idle_chk_reg *regs;
+               u16 num_reg_entries = 1;
+               bool check_rule = true;
+               const u32 *imm_values;
+
+               rule = &input_rules[i];
+               regs = &((const union dbg_idle_chk_reg *)
+                        s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr)
+                       [rule->reg_offset];
+               cond_regs = &regs[0].cond_reg;
+               imm_values = &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_IMMS].ptr
+                            [rule->imm_offset];
+
+               /* Check if all condition register blocks are out of reset, and
+                * find maximal number of entries (all condition registers that
+                * are memories must have the same size, which is > 1).
+                */
+               for (reg_id = 0; reg_id < rule->num_cond_regs && check_rule;
+                    reg_id++) {
+                       u32 block_id = GET_FIELD(cond_regs[reg_id].data,
+                                               DBG_IDLE_CHK_COND_REG_BLOCK_ID);
+
+                       if (block_id >= MAX_BLOCK_ID) {
+                               DP_NOTICE(p_hwfn, "Invalid block_id\n");
+                               return 0;
+                       }
+
+                       check_rule = !dev_data->block_in_reset[block_id];
+                       if (cond_regs[reg_id].num_entries > num_reg_entries)
+                               num_reg_entries = cond_regs[reg_id].num_entries;
+               }
+
+               if (!check_rule && dump)
+                       continue;
+
+               /* Go over all register entries (number of entries is the same
+                * for all condition registers).
+                */
+               for (entry_id = 0; entry_id < num_reg_entries; entry_id++) {
+                       /* Read current entry of all condition registers */
+                       if (dump) {
+                               u32 next_reg_offset = 0;
+
+                               for (reg_id = 0;
+                                    reg_id < rule->num_cond_regs;
+                                    reg_id++) {
+                                       const struct dbg_idle_chk_cond_reg
+                                               *reg = &cond_regs[reg_id];
+
+                                       /* Find GRC address (if it's a memory,
+                                        * the address of the specific entry is
+                                        * calculated).
+                                        */
+                                       u32 grc_addr =
+                                          DWORDS_TO_BYTES(
+                                               GET_FIELD(reg->data,
+                                                   DBG_IDLE_CHK_COND_REG_ADDRESS));
+
+                                       if (reg->num_entries > 1 ||
+                                           reg->start_entry > 0) {
+                                               u32 padded_entry_size =
+                                                       reg->entry_size > 1 ?
+                                                       roundup_pow_of_two
+                                                       (reg->entry_size) : 1;
+
+                                               grc_addr +=
+                                                       DWORDS_TO_BYTES(
+                                                               (reg->start_entry +
+                                                               entry_id)
+                                                               * padded_entry_size);
+                                       }
+
+                                       /* Read registers */
+                                       if (next_reg_offset + reg->entry_size >=
+                                           IDLE_CHK_MAX_ENTRIES_SIZE) {
+                                               DP_NOTICE(p_hwfn,
+                                                         "idle check registers entry is too large\n");
+                                               return 0;
+                                       }
+
+                                       for (j = 0; j < reg->entry_size;
+                                            j++, next_reg_offset++,
+                                            grc_addr += 4)
+                                            cond_reg_values[next_reg_offset] =
+                                               qed_rd(p_hwfn, p_ptt, grc_addr);
+                               }
+                       }
+
+                       /* Call rule's condition function - a return value of
+                        * true indicates failure.
+                        */
+                       if ((*cond_arr[rule->cond_id])(cond_reg_values,
+                                                      imm_values) || !dump) {
+                               offset +=
+                                       qed_idle_chk_dump_failure(p_hwfn,
+                                                       p_ptt,
+                                                       dump_buf + offset,
+                                                       dump,
+                                                       rule->rule_id,
+                                                       rule,
+                                                       entry_id,
+                                                       cond_reg_values);
+                               (*num_failing_rules)++;
+                               break;
+                       }
+               }
+       }
+
+       return offset;
+}
+
+/* Performs Idle Check Dump to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_idle_chk_dump(struct qed_hwfn *p_hwfn,
+                            struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
+{
+       u32 offset = 0, input_offset = 0, num_failing_rules = 0;
+       u32 num_failing_rules_offset;
+
+       /* Dump global params */
+       offset += qed_dump_common_global_params(p_hwfn,
+                                               p_ptt,
+                                               dump_buf + offset, dump, 1);
+       offset += qed_dump_str_param(dump_buf + offset,
+                                    dump, "dump-type", "idle-chk");
+
+       /* Dump idle check section header with a single parameter */
+       offset += qed_dump_section_hdr(dump_buf + offset, dump, "idle_chk", 1);
+       num_failing_rules_offset = offset;
+       offset += qed_dump_num_param(dump_buf + offset, dump, "num_rules", 0);
+       while (input_offset <
+              s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].size_in_dwords) {
+               const struct dbg_idle_chk_cond_hdr *cond_hdr =
+                       (const struct dbg_idle_chk_cond_hdr *)
+                       &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].ptr
+                       [input_offset++];
+               bool eval_mode = GET_FIELD(cond_hdr->mode.data,
+                                          DBG_MODE_HDR_EVAL_MODE) > 0;
+               bool mode_match = true;
+
+               /* Check mode */
+               if (eval_mode) {
+                       u16 modes_buf_offset =
+                               GET_FIELD(cond_hdr->mode.data,
+                                         DBG_MODE_HDR_MODES_BUF_OFFSET);
+
+                       mode_match = qed_is_mode_match(p_hwfn,
+                                                      &modes_buf_offset);
+               }
+
+               if (mode_match) {
+                       u32 curr_failing_rules;
+
+                       offset +=
+                           qed_idle_chk_dump_rule_entries(p_hwfn,
+                               p_ptt,
+                               dump_buf + offset,
+                               dump,
+                               (const struct dbg_idle_chk_rule *)
+                               &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].
+                               ptr[input_offset],
+                               cond_hdr->data_size / IDLE_CHK_RULE_SIZE_DWORDS,
+                               &curr_failing_rules);
+                       num_failing_rules += curr_failing_rules;
+               }
+
+               input_offset += cond_hdr->data_size;
+       }
+
+       /* Overwrite num_rules parameter */
+       if (dump)
+               qed_dump_num_param(dump_buf + num_failing_rules_offset,
+                                  dump, "num_rules", num_failing_rules);
+
+       return offset;
+}
+
+/* Finds the meta data image in NVRAM. */
+static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn,
+                                           struct qed_ptt *p_ptt,
+                                           u32 image_type,
+                                           u32 *nvram_offset_bytes,
+                                           u32 *nvram_size_bytes)
+{
+       u32 ret_mcp_resp, ret_mcp_param, ret_txn_size;
+       struct mcp_file_att file_att;
+
+       /* Call NVRAM get file command */
+       if (qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_NVM_GET_FILE_ATT,
+                              image_type, &ret_mcp_resp, &ret_mcp_param,
+                              &ret_txn_size, (u32 *)&file_att) != 0)
+               return DBG_STATUS_NVRAM_GET_IMAGE_FAILED;
+
+       /* Check response */
+       if ((ret_mcp_resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_NVM_OK)
+               return DBG_STATUS_NVRAM_GET_IMAGE_FAILED;
+
+       /* Update return values */
+       *nvram_offset_bytes = file_att.nvm_start_addr;
+       *nvram_size_bytes = file_att.len;
+       DP_VERBOSE(p_hwfn,
+                  QED_MSG_DEBUG,
+                  "find_nvram_image: found NVRAM image of type %d in NVRAM offset %d bytes with size %d bytes\n",
+                  image_type, *nvram_offset_bytes, *nvram_size_bytes);
+
+       /* Check alignment */
+       if (*nvram_size_bytes & 0x3)
+               return DBG_STATUS_NON_ALIGNED_NVRAM_IMAGE;
+       return DBG_STATUS_OK;
+}
+
+static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn,
+                                     struct qed_ptt *p_ptt,
+                                     u32 nvram_offset_bytes,
+                                     u32 nvram_size_bytes, u32 *ret_buf)
+{
+       u32 ret_mcp_resp, ret_mcp_param, ret_read_size;
+       u32 bytes_to_copy, read_offset = 0;
+       s32 bytes_left = nvram_size_bytes;
+
+       DP_VERBOSE(p_hwfn,
+                  QED_MSG_DEBUG,
+                  "nvram_read: reading image of size %d bytes from NVRAM\n",
+                  nvram_size_bytes);
+       do {
+               bytes_to_copy =
+                   (bytes_left >
+                    MCP_DRV_NVM_BUF_LEN) ? MCP_DRV_NVM_BUF_LEN : bytes_left;
+
+               /* Call NVRAM read command */
+               if (qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt,
+                                      DRV_MSG_CODE_NVM_READ_NVRAM,
+                                      (nvram_offset_bytes +
+                                       read_offset) |
+                                      (bytes_to_copy <<
+                                       DRV_MB_PARAM_NVM_LEN_SHIFT),
+                                      &ret_mcp_resp, &ret_mcp_param,
+                                      &ret_read_size,
+                                      (u32 *)((u8 *)ret_buf +
+                                              read_offset)) != 0)
+                       return DBG_STATUS_NVRAM_READ_FAILED;
+
+               /* Check response */
+               if ((ret_mcp_resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_NVM_OK)
+                       return DBG_STATUS_NVRAM_READ_FAILED;
+
+               /* Update read offset */
+               read_offset += ret_read_size;
+               bytes_left -= ret_read_size;
+       } while (bytes_left > 0);
+
+       return DBG_STATUS_OK;
+}
+
+/* Get info on the MCP Trace data in the scratchpad:
+ * - trace_data_grc_addr - the GRC address of the trace data
+ * - trace_data_size_bytes - the size in bytes of the MCP Trace data (without
+ *     the header)
+ */
+static enum dbg_status qed_mcp_trace_get_data_info(struct qed_hwfn *p_hwfn,
+                                                  struct qed_ptt *p_ptt,
+                                                  u32 *trace_data_grc_addr,
+                                                  u32 *trace_data_size_bytes)
+{
+       /* Read MCP trace section offsize structure from MCP scratchpad */
+       u32 spad_trace_offsize = qed_rd(p_hwfn,
+                                       p_ptt,
+                                       MCP_SPAD_TRACE_OFFSIZE_ADDR);
+       u32 signature;
+
+       /* Extract MCP trace section GRC address from offsize structure (within
+        * scratchpad).
+        */
+       *trace_data_grc_addr =
+               MCP_REG_SCRATCH + SECTION_OFFSET(spad_trace_offsize);
+
+       /* Read signature from MCP trace section */
+       signature = qed_rd(p_hwfn, p_ptt,
+                          *trace_data_grc_addr +
+                          offsetof(struct mcp_trace, signature));
+       if (signature != MFW_TRACE_SIGNATURE)
+               return DBG_STATUS_INVALID_TRACE_SIGNATURE;
+
+       /* Read trace size from MCP trace section */
+       *trace_data_size_bytes = qed_rd(p_hwfn,
+                                       p_ptt,
+                                       *trace_data_grc_addr +
+                                       offsetof(struct mcp_trace, size));
+       return DBG_STATUS_OK;
+}
+
+/* Reads MCP trace meta data image from NVRAM.
+ * - running_bundle_id (OUT) - the running bundle ID (invalid when loaded from
+ *     file)
+ * - trace_meta_offset_bytes (OUT) - the NVRAM offset in bytes in which the MCP
+ *     Trace meta data starts (invalid when loaded from file)
+ * - trace_meta_size_bytes (OUT) - the size in bytes of the MCP Trace meta data
+ */
+static enum dbg_status qed_mcp_trace_get_meta_info(struct qed_hwfn *p_hwfn,
+                                                  struct qed_ptt *p_ptt,
+                                                  u32 trace_data_size_bytes,
+                                                  u32 *running_bundle_id,
+                                                  u32 *trace_meta_offset_bytes,
+                                                  u32 *trace_meta_size_bytes)
+{
+       /* Read MCP trace section offsize structure from MCP scratchpad */
+       u32 spad_trace_offsize = qed_rd(p_hwfn,
+                                       p_ptt,
+                                       MCP_SPAD_TRACE_OFFSIZE_ADDR);
+
+       /* Find running bundle ID */
+       u32 running_mfw_addr =
+               MCP_REG_SCRATCH + SECTION_OFFSET(spad_trace_offsize) +
+               QED_SECTION_SIZE(spad_trace_offsize) + trace_data_size_bytes;
+       enum dbg_status status;
+       u32 nvram_image_type;
+
+       *running_bundle_id = qed_rd(p_hwfn, p_ptt, running_mfw_addr);
+       if (*running_bundle_id > 1)
+               return DBG_STATUS_INVALID_NVRAM_BUNDLE;
+
+       /* Find image in NVRAM */
+       nvram_image_type =
+           (*running_bundle_id ==
+            DIR_ID_1) ? NVM_TYPE_MFW_TRACE1 : NVM_TYPE_MFW_TRACE2;
+       status = qed_find_nvram_image(p_hwfn,
+                                     p_ptt,
+                                     nvram_image_type,
+                                     trace_meta_offset_bytes,
+                                     trace_meta_size_bytes);
+
+       return status;
+}
+
+/* Reads the MCP Trace data from the specified GRC address into the specified
+ * buffer.
+ */
+static void qed_mcp_trace_read_data(struct qed_hwfn *p_hwfn,
+                                   struct qed_ptt *p_ptt,
+                                   u32 grc_addr, u32 size_in_dwords, u32 *buf)
+{
+       u32 i;
+
+       DP_VERBOSE(p_hwfn,
+                  QED_MSG_DEBUG,
+                  "mcp_trace_read_data: reading trace data of size %d dwords from GRC address 0x%x\n",
+                  size_in_dwords, grc_addr);
+       for (i = 0; i < size_in_dwords; i++, grc_addr += BYTES_IN_DWORD)
+               buf[i] = qed_rd(p_hwfn, p_ptt, grc_addr);
+}
+
+/* Reads the MCP Trace meta data (from NVRAM or buffer) into the specified
+ * buffer.
+ */
+static enum dbg_status qed_mcp_trace_read_meta(struct qed_hwfn *p_hwfn,
+                                              struct qed_ptt *p_ptt,
+                                              u32 nvram_offset_in_bytes,
+                                              u32 size_in_bytes, u32 *buf)
+{
+       u8 *byte_buf = (u8 *)buf;
+       u8 modules_num, i;
+       u32 signature;
+
+       /* Read meta data from NVRAM */
+       enum dbg_status status = qed_nvram_read(p_hwfn,
+                                               p_ptt,
+                                               nvram_offset_in_bytes,
+                                               size_in_bytes,
+                                               buf);
+
+       if (status != DBG_STATUS_OK)
+               return status;
+
+       /* Extract and check first signature */
+       signature = qed_read_unaligned_dword(byte_buf);
+       byte_buf += sizeof(u32);
+       if (signature != MCP_TRACE_META_IMAGE_SIGNATURE)
+               return DBG_STATUS_INVALID_TRACE_SIGNATURE;
+
+       /* Extract number of modules */
+       modules_num = *(byte_buf++);
+
+       /* Skip all modules */
+       for (i = 0; i < modules_num; i++) {
+               u8 module_len = *(byte_buf++);
+
+               byte_buf += module_len;
+       }
+
+       /* Extract and check second signature */
+       signature = qed_read_unaligned_dword(byte_buf);
+       byte_buf += sizeof(u32);
+       if (signature != MCP_TRACE_META_IMAGE_SIGNATURE)
+               return DBG_STATUS_INVALID_TRACE_SIGNATURE;
+       return DBG_STATUS_OK;
+}
+
+/* Dump MCP Trace */
+enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
+                                  struct qed_ptt *p_ptt,
+                                  u32 *dump_buf,
+                                  bool dump, u32 *num_dumped_dwords)
+{
+       u32 trace_data_grc_addr, trace_data_size_bytes, trace_data_size_dwords;
+       u32 trace_meta_size_dwords, running_bundle_id, offset = 0;
+       u32 trace_meta_offset_bytes, trace_meta_size_bytes;
+       enum dbg_status status;
+       int halted = 0;
+
+       *num_dumped_dwords = 0;
+
+       /* Get trace data info */
+       status = qed_mcp_trace_get_data_info(p_hwfn,
+                                            p_ptt,
+                                            &trace_data_grc_addr,
+                                            &trace_data_size_bytes);
+       if (status != DBG_STATUS_OK)
+               return status;
+
+       /* Dump global params */
+       offset += qed_dump_common_global_params(p_hwfn,
+                                               p_ptt,
+                                               dump_buf + offset, dump, 1);
+       offset += qed_dump_str_param(dump_buf + offset,
+                                    dump, "dump-type", "mcp-trace");
+
+       /* Halt MCP while reading from scratchpad so the read data will be
+        * consistent if halt fails, MCP trace is taken anyway, with a small
+        * risk that it may be corrupt.
+        */
+       if (dump) {
+               halted = !qed_mcp_halt(p_hwfn, p_ptt);
+               if (!halted)
+                       DP_NOTICE(p_hwfn, "MCP halt failed!\n");
+       }
+
+       /* Find trace data size */
+       trace_data_size_dwords =
+               DIV_ROUND_UP(trace_data_size_bytes + sizeof(struct mcp_trace),
+                            BYTES_IN_DWORD);
+
+       /* Dump trace data section header and param */
+       offset += qed_dump_section_hdr(dump_buf + offset,
+                                      dump, "mcp_trace_data", 1);
+       offset += qed_dump_num_param(dump_buf + offset,
+                                    dump, "size", trace_data_size_dwords);
+
+       /* Read trace data from scratchpad into dump buffer */
+       if (dump)
+               qed_mcp_trace_read_data(p_hwfn,
+                                       p_ptt,
+                                       trace_data_grc_addr,
+                                       trace_data_size_dwords,
+                                       dump_buf + offset);
+       offset += trace_data_size_dwords;
+
+       /* Resume MCP (only if halt succeeded) */
+       if (halted && qed_mcp_resume(p_hwfn, p_ptt) != 0)
+               DP_NOTICE(p_hwfn, "Failed to resume MCP after halt!\n");
+
+       /* Dump trace meta section header */
+       offset += qed_dump_section_hdr(dump_buf + offset,
+                                      dump, "mcp_trace_meta", 1);
+
+       /* Read trace meta info */
+       status = qed_mcp_trace_get_meta_info(p_hwfn,
+                                            p_ptt,
+                                            trace_data_size_bytes,
+                                            &running_bundle_id,
+                                            &trace_meta_offset_bytes,
+                                            &trace_meta_size_bytes);
+       if (status != DBG_STATUS_OK)
+               return status;
+
+       /* Dump trace meta size param (trace_meta_size_bytes is always
+        * dword-aligned).
+        */
+       trace_meta_size_dwords = BYTES_TO_DWORDS(trace_meta_size_bytes);
+       offset += qed_dump_num_param(dump_buf + offset, dump, "size",
+                                    trace_meta_size_dwords);
+
+       /* Read trace meta image into dump buffer */
+       if (dump) {
+               status = qed_mcp_trace_read_meta(p_hwfn,
+                                               p_ptt,
+                                               trace_meta_offset_bytes,
+                                               trace_meta_size_bytes,
+                                               dump_buf + offset);
+               if (status != DBG_STATUS_OK)
+                       return status;
+       }
+
+       offset += trace_meta_size_dwords;
+
+       *num_dumped_dwords = offset;
+
+       return DBG_STATUS_OK;
+}
+
+/* Dump GRC FIFO */
+enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn,
+                                 struct qed_ptt *p_ptt,
+                                 u32 *dump_buf,
+                                 bool dump, u32 *num_dumped_dwords)
+{
+       u32 offset = 0, dwords_read, size_param_offset;
+       bool fifo_has_data;
+
+       *num_dumped_dwords = 0;
+
+       /* Dump global params */
+       offset += qed_dump_common_global_params(p_hwfn,
+                                               p_ptt,
+                                               dump_buf + offset, dump, 1);
+       offset += qed_dump_str_param(dump_buf + offset,
+                                    dump, "dump-type", "reg-fifo");
+
+       /* Dump fifo data section header and param. The size param is 0 for now,
+        * and is overwritten after reading the FIFO.
+        */
+       offset += qed_dump_section_hdr(dump_buf + offset,
+                                      dump, "reg_fifo_data", 1);
+       size_param_offset = offset;
+       offset += qed_dump_num_param(dump_buf + offset, dump, "size", 0);
+
+       if (!dump) {
+               /* FIFO max size is REG_FIFO_DEPTH_DWORDS. There is no way to
+                * test how much data is available, except for reading it.
+                */
+               offset += REG_FIFO_DEPTH_DWORDS;
+               *num_dumped_dwords = offset;
+               return DBG_STATUS_OK;
+       }
+
+       fifo_has_data = qed_rd(p_hwfn, p_ptt,
+                              GRC_REG_TRACE_FIFO_VALID_DATA) > 0;
+
+       /* Pull available data from fifo. Use DMAE since this is widebus memory
+        * and must be accessed atomically. Test for dwords_read not passing
+        * buffer size since more entries could be added to the buffer as we are
+        * emptying it.
+        */
+       for (dwords_read = 0;
+            fifo_has_data && dwords_read < REG_FIFO_DEPTH_DWORDS;
+            dwords_read += REG_FIFO_ELEMENT_DWORDS, offset +=
+            REG_FIFO_ELEMENT_DWORDS) {
+               if (qed_dmae_grc2host(p_hwfn, p_ptt, GRC_REG_TRACE_FIFO,
+                                     (u64)(uintptr_t)(&dump_buf[offset]),
+                                     REG_FIFO_ELEMENT_DWORDS, 0))
+                       return DBG_STATUS_DMAE_FAILED;
+               fifo_has_data = qed_rd(p_hwfn, p_ptt,
+                                      GRC_REG_TRACE_FIFO_VALID_DATA) > 0;
+       }
+
+       qed_dump_num_param(dump_buf + size_param_offset, dump, "size",
+                          dwords_read);
+
+       *num_dumped_dwords = offset;
+       return DBG_STATUS_OK;
+}
+
+/* Dump IGU FIFO */
+enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn,
+                                 struct qed_ptt *p_ptt,
+                                 u32 *dump_buf,
+                                 bool dump, u32 *num_dumped_dwords)
+{
+       u32 offset = 0, dwords_read, size_param_offset;
+       bool fifo_has_data;
+
+       *num_dumped_dwords = 0;
+
+       /* Dump global params */
+       offset += qed_dump_common_global_params(p_hwfn,
+                                               p_ptt,
+                                               dump_buf + offset, dump, 1);
+       offset += qed_dump_str_param(dump_buf + offset,
+                                    dump, "dump-type", "igu-fifo");
+
+       /* Dump fifo data section header and param. The size param is 0 for now,
+        * and is overwritten after reading the FIFO.
+        */
+       offset += qed_dump_section_hdr(dump_buf + offset,
+                                      dump, "igu_fifo_data", 1);
+       size_param_offset = offset;
+       offset += qed_dump_num_param(dump_buf + offset, dump, "size", 0);
+
+       if (!dump) {
+               /* FIFO max size is IGU_FIFO_DEPTH_DWORDS. There is no way to
+                * test how much data is available, except for reading it.
+                */
+               offset += IGU_FIFO_DEPTH_DWORDS;
+               *num_dumped_dwords = offset;
+               return DBG_STATUS_OK;
+       }
+
+       fifo_has_data = qed_rd(p_hwfn, p_ptt,
+                              IGU_REG_ERROR_HANDLING_DATA_VALID) > 0;
+
+       /* Pull available data from fifo. Use DMAE since this is widebus memory
+        * and must be accessed atomically. Test for dwords_read not passing
+        * buffer size since more entries could be added to the buffer as we are
+        * emptying it.
+        */
+       for (dwords_read = 0;
+            fifo_has_data && dwords_read < IGU_FIFO_DEPTH_DWORDS;
+            dwords_read += IGU_FIFO_ELEMENT_DWORDS, offset +=
+            IGU_FIFO_ELEMENT_DWORDS) {
+               if (qed_dmae_grc2host(p_hwfn, p_ptt,
+                                     IGU_REG_ERROR_HANDLING_MEMORY,
+                                     (u64)(uintptr_t)(&dump_buf[offset]),
+                                     IGU_FIFO_ELEMENT_DWORDS, 0))
+                       return DBG_STATUS_DMAE_FAILED;
+               fifo_has_data = qed_rd(p_hwfn, p_ptt,
+                                      IGU_REG_ERROR_HANDLING_DATA_VALID) > 0;
+       }
+
+       qed_dump_num_param(dump_buf + size_param_offset, dump, "size",
+                          dwords_read);
+
+       *num_dumped_dwords = offset;
+       return DBG_STATUS_OK;
+}
+
+/* Protection Override dump */
+enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn,
+                                            struct qed_ptt *p_ptt,
+                                            u32 *dump_buf,
+                                            bool dump, u32 *num_dumped_dwords)
+{
+       u32 offset = 0, size_param_offset, override_window_dwords;
+
+       *num_dumped_dwords = 0;
+
+       /* Dump global params */
+       offset += qed_dump_common_global_params(p_hwfn,
+                                               p_ptt,
+                                               dump_buf + offset, dump, 1);
+       offset += qed_dump_str_param(dump_buf + offset,
+                                    dump, "dump-type", "protection-override");
+
+       /* Dump data section header and param. The size param is 0 for now, and
+        * is overwritten after reading the data.
+        */
+       offset += qed_dump_section_hdr(dump_buf + offset,
+                                      dump, "protection_override_data", 1);
+       size_param_offset = offset;
+       offset += qed_dump_num_param(dump_buf + offset, dump, "size", 0);
+
+       if (!dump) {
+               offset += PROTECTION_OVERRIDE_DEPTH_DWORDS;
+               *num_dumped_dwords = offset;
+               return DBG_STATUS_OK;
+       }
+
+       /* Add override window info to buffer */
+       override_window_dwords =
+               qed_rd(p_hwfn, p_ptt,
+                      GRC_REG_NUMBER_VALID_OVERRIDE_WINDOW) *
+                      PROTECTION_OVERRIDE_ELEMENT_DWORDS;
+       if (qed_dmae_grc2host(p_hwfn, p_ptt,
+                             GRC_REG_PROTECTION_OVERRIDE_WINDOW,
+                             (u64)(uintptr_t)(dump_buf + offset),
+                             override_window_dwords, 0))
+               return DBG_STATUS_DMAE_FAILED;
+       offset += override_window_dwords;
+       qed_dump_num_param(dump_buf + size_param_offset, dump, "size",
+                          override_window_dwords);
+
+       *num_dumped_dwords = offset;
+       return DBG_STATUS_OK;
+}
+
+/* Performs FW Asserts Dump to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_fw_asserts_dump(struct qed_hwfn *p_hwfn,
+                              struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
+{
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+       char storm_letter_str[2] = "?";
+       struct fw_info fw_info;
+       u32 offset = 0, i;
+       u8 storm_id;
+
+       /* Dump global params */
+       offset += qed_dump_common_global_params(p_hwfn,
+                                               p_ptt,
+                                               dump_buf + offset, dump, 1);
+       offset += qed_dump_str_param(dump_buf + offset,
+                                    dump, "dump-type", "fw-asserts");
+       for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
+               u32 fw_asserts_section_addr, next_list_idx_addr, next_list_idx,
+                       last_list_idx, element_addr;
+
+               if (dev_data->block_in_reset[s_storm_defs[storm_id].block_id])
+                       continue;
+
+               /* Read FW info for the current Storm */
+               qed_read_fw_info(p_hwfn, p_ptt, storm_id, &fw_info);
+
+               /* Dump FW Asserts section header and params */
+               storm_letter_str[0] = s_storm_defs[storm_id].letter;
+               offset += qed_dump_section_hdr(dump_buf + offset, dump,
+                                              "fw_asserts", 2);
+               offset += qed_dump_str_param(dump_buf + offset, dump, "storm",
+                                            storm_letter_str);
+               offset += qed_dump_num_param(dump_buf + offset, dump, "size",
+                                            fw_info.fw_asserts_section.
+                                            list_element_dword_size);
+
+               if (!dump) {
+                       offset += fw_info.fw_asserts_section.
+                                 list_element_dword_size;
+                       continue;
+               }
+
+               /* Read and dump FW Asserts data */
+               fw_asserts_section_addr =
+                       s_storm_defs[storm_id].sem_fast_mem_addr +
+                       SEM_FAST_REG_INT_RAM +
+                       RAM_LINES_TO_BYTES(fw_info.fw_asserts_section.
+                                          section_ram_line_offset);
+               next_list_idx_addr =
+                       fw_asserts_section_addr +
+                       DWORDS_TO_BYTES(fw_info.fw_asserts_section.
+                                       list_next_index_dword_offset);
+               next_list_idx = qed_rd(p_hwfn, p_ptt, next_list_idx_addr);
+               last_list_idx = (next_list_idx > 0
+                                ? next_list_idx
+                                : fw_info.fw_asserts_section.list_num_elements)
+                               - 1;
+               element_addr =
+                       fw_asserts_section_addr +
+                       DWORDS_TO_BYTES(fw_info.fw_asserts_section.
+                                       list_dword_offset) +
+                       last_list_idx *
+                       DWORDS_TO_BYTES(fw_info.fw_asserts_section.
+                                       list_element_dword_size);
+               for (i = 0;
+                    i < fw_info.fw_asserts_section.list_element_dword_size;
+                    i++, offset++, element_addr += BYTES_IN_DWORD)
+                       dump_buf[offset] = qed_rd(p_hwfn, p_ptt, element_addr);
+       }
+
+       /* Dump last section */
+       offset += qed_dump_section_hdr(dump_buf + offset, dump, "last", 0);
+       return offset;
+}
+
+/***************************** Public Functions *******************************/
+
+enum dbg_status qed_dbg_set_bin_ptr(const u8 * const bin_ptr)
+{
+       /* Convert binary data to debug arrays */
+       u32 num_of_buffers = *(u32 *)bin_ptr;
+       struct bin_buffer_hdr *buf_array;
+       u8 buf_id;
+
+       buf_array = (struct bin_buffer_hdr *)((u32 *)bin_ptr + 1);
+
+       for (buf_id = 0; buf_id < num_of_buffers; buf_id++) {
+               s_dbg_arrays[buf_id].ptr =
+                   (u32 *)(bin_ptr + buf_array[buf_id].offset);
+               s_dbg_arrays[buf_id].size_in_dwords =
+                   BYTES_TO_DWORDS(buf_array[buf_id].length);
+       }
+
+       return DBG_STATUS_OK;
+}
+
+enum dbg_status qed_dbg_grc_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+                                             struct qed_ptt *p_ptt,
+                                             u32 *buf_size)
+{
+       enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+
+       *buf_size = 0;
+       if (status != DBG_STATUS_OK)
+               return status;
+       if (!s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr ||
+           !s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr ||
+           !s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr ||
+           !s_dbg_arrays[BIN_BUF_DBG_ATTN_BLOCKS].ptr ||
+           !s_dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr)
+               return DBG_STATUS_DBG_ARRAY_NOT_SET;
+       return qed_grc_dump(p_hwfn, p_ptt, NULL, false, buf_size);
+}
+
+enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn,
+                                struct qed_ptt *p_ptt,
+                                u32 *dump_buf,
+                                u32 buf_size_in_dwords,
+                                u32 *num_dumped_dwords)
+{
+       u32 needed_buf_size_in_dwords;
+       enum dbg_status status;
+
+       status = qed_dbg_grc_get_dump_buf_size(p_hwfn, p_ptt,
+                                              &needed_buf_size_in_dwords);
+
+       *num_dumped_dwords = 0;
+       if (status != DBG_STATUS_OK)
+               return status;
+       if (buf_size_in_dwords < needed_buf_size_in_dwords)
+               return DBG_STATUS_DUMP_BUF_TOO_SMALL;
+
+       /* GRC Dump */
+       status = qed_grc_dump(p_hwfn, p_ptt, dump_buf, true, num_dumped_dwords);
+
+       /* Clear all GRC params */
+       qed_dbg_grc_clear_params(p_hwfn);
+       return status;
+}
+
+enum dbg_status qed_dbg_idle_chk_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+                                                  struct qed_ptt *p_ptt,
+                                                  u32 *buf_size)
+{
+       enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+       struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+
+       *buf_size = 0;
+       if (status != DBG_STATUS_OK)
+               return status;
+       if (!s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr ||
+           !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr ||
+           !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_IMMS].ptr ||
+           !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].ptr)
+               return DBG_STATUS_DBG_ARRAY_NOT_SET;
+       if (!dev_data->idle_chk.buf_size_set) {
+               dev_data->idle_chk.buf_size = qed_idle_chk_dump(p_hwfn,
+                                                               p_ptt,
+                                                               NULL, false);
+               dev_data->idle_chk.buf_size_set = true;
+       }
+
+       *buf_size = dev_data->idle_chk.buf_size;
+       return DBG_STATUS_OK;
+}
+
+enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn,
+                                     struct qed_ptt *p_ptt,
+                                     u32 *dump_buf,
+                                     u32 buf_size_in_dwords,
+                                     u32 *num_dumped_dwords)
+{
+       u32 needed_buf_size_in_dwords;
+       enum dbg_status status;
+
+       status = qed_dbg_idle_chk_get_dump_buf_size(p_hwfn, p_ptt,
+                                                   &needed_buf_size_in_dwords);
+
+       *num_dumped_dwords = 0;
+       if (status != DBG_STATUS_OK)
+               return status;
+       if (buf_size_in_dwords < needed_buf_size_in_dwords)
+               return DBG_STATUS_DUMP_BUF_TOO_SMALL;
+
+       /* Update reset state */
+       qed_update_blocks_reset_state(p_hwfn, p_ptt);
+
+       /* Idle Check Dump */
+       *num_dumped_dwords = qed_idle_chk_dump(p_hwfn, p_ptt, dump_buf, true);
+       return DBG_STATUS_OK;
+}
+
+enum dbg_status qed_dbg_mcp_trace_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+                                                   struct qed_ptt *p_ptt,
+                                                   u32 *buf_size)
+{
+       enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+
+       *buf_size = 0;
+       if (status != DBG_STATUS_OK)
+               return status;
+       return qed_mcp_trace_dump(p_hwfn, p_ptt, NULL, false, buf_size);
+}
+
+enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn,
+                                      struct qed_ptt *p_ptt,
+                                      u32 *dump_buf,
+                                      u32 buf_size_in_dwords,
+                                      u32 *num_dumped_dwords)
+{
+       u32 needed_buf_size_in_dwords;
+       enum dbg_status status;
+
+       status = qed_dbg_mcp_trace_get_dump_buf_size(p_hwfn, p_ptt,
+                                               &needed_buf_size_in_dwords);
+
+       if (status != DBG_STATUS_OK)
+               return status;
+       if (buf_size_in_dwords < needed_buf_size_in_dwords)
+               return DBG_STATUS_DUMP_BUF_TOO_SMALL;
+
+       /* Update reset state */
+       qed_update_blocks_reset_state(p_hwfn, p_ptt);
+
+       /* Perform dump */
+       return qed_mcp_trace_dump(p_hwfn,
+                                 p_ptt, dump_buf, true, num_dumped_dwords);
+}
+
+enum dbg_status qed_dbg_reg_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+                                                  struct qed_ptt *p_ptt,
+                                                  u32 *buf_size)
+{
+       enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+
+       *buf_size = 0;
+       if (status != DBG_STATUS_OK)
+               return status;
+       return qed_reg_fifo_dump(p_hwfn, p_ptt, NULL, false, buf_size);
+}
+
+enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn,
+                                     struct qed_ptt *p_ptt,
+                                     u32 *dump_buf,
+                                     u32 buf_size_in_dwords,
+                                     u32 *num_dumped_dwords)
+{
+       u32 needed_buf_size_in_dwords;
+       enum dbg_status status;
+
+       status = qed_dbg_reg_fifo_get_dump_buf_size(p_hwfn, p_ptt,
+                                                   &needed_buf_size_in_dwords);
+
+       *num_dumped_dwords = 0;
+       if (status != DBG_STATUS_OK)
+               return status;
+       if (buf_size_in_dwords < needed_buf_size_in_dwords)
+               return DBG_STATUS_DUMP_BUF_TOO_SMALL;
+
+       /* Update reset state */
+       qed_update_blocks_reset_state(p_hwfn, p_ptt);
+       return qed_reg_fifo_dump(p_hwfn,
+                                p_ptt, dump_buf, true, num_dumped_dwords);
+}
+
+enum dbg_status qed_dbg_igu_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+                                                  struct qed_ptt *p_ptt,
+                                                  u32 *buf_size)
+{
+       enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+
+       *buf_size = 0;
+       if (status != DBG_STATUS_OK)
+               return status;
+       return qed_igu_fifo_dump(p_hwfn, p_ptt, NULL, false, buf_size);
+}
+
+enum dbg_status qed_dbg_igu_fifo_dump(struct qed_hwfn *p_hwfn,
+                                     struct qed_ptt *p_ptt,
+                                     u32 *dump_buf,
+                                     u32 buf_size_in_dwords,
+                                     u32 *num_dumped_dwords)
+{
+       u32 needed_buf_size_in_dwords;
+       enum dbg_status status;
+
+       status = qed_dbg_igu_fifo_get_dump_buf_size(p_hwfn, p_ptt,
+                                                   &needed_buf_size_in_dwords);
+
+       *num_dumped_dwords = 0;
+       if (status != DBG_STATUS_OK)
+               return status;
+       if (buf_size_in_dwords < needed_buf_size_in_dwords)
+               return DBG_STATUS_DUMP_BUF_TOO_SMALL;
+
+       /* Update reset state */
+       qed_update_blocks_reset_state(p_hwfn, p_ptt);
+       return qed_igu_fifo_dump(p_hwfn,
+                                p_ptt, dump_buf, true, num_dumped_dwords);
+}
+
+enum dbg_status
+qed_dbg_protection_override_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+                                             struct qed_ptt *p_ptt,
+                                             u32 *buf_size)
+{
+       enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+
+       *buf_size = 0;
+       if (status != DBG_STATUS_OK)
+               return status;
+       return qed_protection_override_dump(p_hwfn,
+                                           p_ptt, NULL, false, buf_size);
+}
+
+enum dbg_status qed_dbg_protection_override_dump(struct qed_hwfn *p_hwfn,
+                                                struct qed_ptt *p_ptt,
+                                                u32 *dump_buf,
+                                                u32 buf_size_in_dwords,
+                                                u32 *num_dumped_dwords)
+{
+       u32 needed_buf_size_in_dwords;
+       enum dbg_status status;
+
+       status = qed_dbg_protection_override_get_dump_buf_size(p_hwfn, p_ptt,
+                                               &needed_buf_size_in_dwords);
+
+       *num_dumped_dwords = 0;
+       if (status != DBG_STATUS_OK)
+               return status;
+       if (buf_size_in_dwords < needed_buf_size_in_dwords)
+               return DBG_STATUS_DUMP_BUF_TOO_SMALL;
+
+       /* Update reset state */
+       qed_update_blocks_reset_state(p_hwfn, p_ptt);
+       return qed_protection_override_dump(p_hwfn,
+                                           p_ptt,
+                                           dump_buf, true, num_dumped_dwords);
+}
+
+enum dbg_status qed_dbg_fw_asserts_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+                                                    struct qed_ptt *p_ptt,
+                                                    u32 *buf_size)
+{
+       enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+
+       *buf_size = 0;
+       if (status != DBG_STATUS_OK)
+               return status;
+
+       /* Update reset state */
+       qed_update_blocks_reset_state(p_hwfn, p_ptt);
+       *buf_size = qed_fw_asserts_dump(p_hwfn, p_ptt, NULL, false);
+       return DBG_STATUS_OK;
+}
+
+enum dbg_status qed_dbg_fw_asserts_dump(struct qed_hwfn *p_hwfn,
+                                       struct qed_ptt *p_ptt,
+                                       u32 *dump_buf,
+                                       u32 buf_size_in_dwords,
+                                       u32 *num_dumped_dwords)
+{
+       u32 needed_buf_size_in_dwords;
+       enum dbg_status status;
+
+       status = qed_dbg_fw_asserts_get_dump_buf_size(p_hwfn, p_ptt,
+                                               &needed_buf_size_in_dwords);
+
+       *num_dumped_dwords = 0;
+       if (status != DBG_STATUS_OK)
+               return status;
+       if (buf_size_in_dwords < needed_buf_size_in_dwords)
+               return DBG_STATUS_DUMP_BUF_TOO_SMALL;
+
+       *num_dumped_dwords = qed_fw_asserts_dump(p_hwfn, p_ptt, dump_buf, true);
+       return DBG_STATUS_OK;
+}
+
+/******************************* Data Types **********************************/
+
+struct mcp_trace_format {
+       u32 data;
+#define MCP_TRACE_FORMAT_MODULE_MASK   0x0000ffff
+#define MCP_TRACE_FORMAT_MODULE_SHIFT  0
+#define MCP_TRACE_FORMAT_LEVEL_MASK    0x00030000
+#define MCP_TRACE_FORMAT_LEVEL_SHIFT   16
+#define MCP_TRACE_FORMAT_P1_SIZE_MASK  0x000c0000
+#define MCP_TRACE_FORMAT_P1_SIZE_SHIFT 18
+#define MCP_TRACE_FORMAT_P2_SIZE_MASK  0x00300000
+#define MCP_TRACE_FORMAT_P2_SIZE_SHIFT 20
+#define MCP_TRACE_FORMAT_P3_SIZE_MASK  0x00c00000
+#define MCP_TRACE_FORMAT_P3_SIZE_SHIFT 22
+#define MCP_TRACE_FORMAT_LEN_MASK      0xff000000
+#define MCP_TRACE_FORMAT_LEN_SHIFT     24
+       char *format_str;
+};
+
+struct mcp_trace_meta {
+       u32 modules_num;
+       char **modules;
+       u32 formats_num;
+       struct mcp_trace_format *formats;
+};
+
+/* Reg fifo element */
+struct reg_fifo_element {
+       u64 data;
+#define REG_FIFO_ELEMENT_ADDRESS_SHIFT         0
+#define REG_FIFO_ELEMENT_ADDRESS_MASK          0x7fffff
+#define REG_FIFO_ELEMENT_ACCESS_SHIFT          23
+#define REG_FIFO_ELEMENT_ACCESS_MASK           0x1
+#define REG_FIFO_ELEMENT_PF_SHIFT              24
+#define REG_FIFO_ELEMENT_PF_MASK               0xf
+#define REG_FIFO_ELEMENT_VF_SHIFT              28
+#define REG_FIFO_ELEMENT_VF_MASK               0xff
+#define REG_FIFO_ELEMENT_PORT_SHIFT            36
+#define REG_FIFO_ELEMENT_PORT_MASK             0x3
+#define REG_FIFO_ELEMENT_PRIVILEGE_SHIFT       38
+#define REG_FIFO_ELEMENT_PRIVILEGE_MASK                0x3
+#define REG_FIFO_ELEMENT_PROTECTION_SHIFT      40
+#define REG_FIFO_ELEMENT_PROTECTION_MASK       0x7
+#define REG_FIFO_ELEMENT_MASTER_SHIFT          43
+#define REG_FIFO_ELEMENT_MASTER_MASK           0xf
+#define REG_FIFO_ELEMENT_ERROR_SHIFT           47
+#define REG_FIFO_ELEMENT_ERROR_MASK            0x1f
+};
+
+/* IGU fifo element */
+struct igu_fifo_element {
+       u32 dword0;
+#define IGU_FIFO_ELEMENT_DWORD0_FID_SHIFT              0
+#define IGU_FIFO_ELEMENT_DWORD0_FID_MASK               0xff
+#define IGU_FIFO_ELEMENT_DWORD0_IS_PF_SHIFT            8
+#define IGU_FIFO_ELEMENT_DWORD0_IS_PF_MASK             0x1
+#define IGU_FIFO_ELEMENT_DWORD0_SOURCE_SHIFT           9
+#define IGU_FIFO_ELEMENT_DWORD0_SOURCE_MASK            0xf
+#define IGU_FIFO_ELEMENT_DWORD0_ERR_TYPE_SHIFT         13
+#define IGU_FIFO_ELEMENT_DWORD0_ERR_TYPE_MASK          0xf
+#define IGU_FIFO_ELEMENT_DWORD0_CMD_ADDR_SHIFT         17
+#define IGU_FIFO_ELEMENT_DWORD0_CMD_ADDR_MASK          0x7fff
+       u32 dword1;
+       u32 dword2;
+#define IGU_FIFO_ELEMENT_DWORD12_IS_WR_CMD_SHIFT       0
+#define IGU_FIFO_ELEMENT_DWORD12_IS_WR_CMD_MASK                0x1
+#define IGU_FIFO_ELEMENT_DWORD12_WR_DATA_SHIFT         1
+#define IGU_FIFO_ELEMENT_DWORD12_WR_DATA_MASK          0xffffffff
+       u32 reserved;
+};
+
+struct igu_fifo_wr_data {
+       u32 data;
+#define IGU_FIFO_WR_DATA_PROD_CONS_SHIFT               0
+#define IGU_FIFO_WR_DATA_PROD_CONS_MASK                        0xffffff
+#define IGU_FIFO_WR_DATA_UPDATE_FLAG_SHIFT             24
+#define IGU_FIFO_WR_DATA_UPDATE_FLAG_MASK              0x1
+#define IGU_FIFO_WR_DATA_EN_DIS_INT_FOR_SB_SHIFT       25
+#define IGU_FIFO_WR_DATA_EN_DIS_INT_FOR_SB_MASK                0x3
+#define IGU_FIFO_WR_DATA_SEGMENT_SHIFT                 27
+#define IGU_FIFO_WR_DATA_SEGMENT_MASK                  0x1
+#define IGU_FIFO_WR_DATA_TIMER_MASK_SHIFT              28
+#define IGU_FIFO_WR_DATA_TIMER_MASK_MASK               0x1
+#define IGU_FIFO_WR_DATA_CMD_TYPE_SHIFT                        31
+#define IGU_FIFO_WR_DATA_CMD_TYPE_MASK                 0x1
+};
+
+struct igu_fifo_cleanup_wr_data {
+       u32 data;
+#define IGU_FIFO_CLEANUP_WR_DATA_RESERVED_SHIFT                0
+#define IGU_FIFO_CLEANUP_WR_DATA_RESERVED_MASK         0x7ffffff
+#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_VAL_SHIFT     27
+#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_VAL_MASK      0x1
+#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_TYPE_SHIFT    28
+#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_TYPE_MASK     0x7
+#define IGU_FIFO_CLEANUP_WR_DATA_CMD_TYPE_SHIFT                31
+#define IGU_FIFO_CLEANUP_WR_DATA_CMD_TYPE_MASK         0x1
+};
+
+/* Protection override element */
+struct protection_override_element {
+       u64 data;
+#define PROTECTION_OVERRIDE_ELEMENT_ADDRESS_SHIFT              0
+#define PROTECTION_OVERRIDE_ELEMENT_ADDRESS_MASK               0x7fffff
+#define PROTECTION_OVERRIDE_ELEMENT_WINDOW_SIZE_SHIFT          23
+#define PROTECTION_OVERRIDE_ELEMENT_WINDOW_SIZE_MASK           0xffffff
+#define PROTECTION_OVERRIDE_ELEMENT_READ_SHIFT                 47
+#define PROTECTION_OVERRIDE_ELEMENT_READ_MASK                  0x1
+#define PROTECTION_OVERRIDE_ELEMENT_WRITE_SHIFT                        48
+#define PROTECTION_OVERRIDE_ELEMENT_WRITE_MASK                 0x1
+#define PROTECTION_OVERRIDE_ELEMENT_READ_PROTECTION_SHIFT      49
+#define PROTECTION_OVERRIDE_ELEMENT_READ_PROTECTION_MASK       0x7
+#define PROTECTION_OVERRIDE_ELEMENT_WRITE_PROTECTION_SHIFT     52
+#define PROTECTION_OVERRIDE_ELEMENT_WRITE_PROTECTION_MASK      0x7
+};
+
+enum igu_fifo_sources {
+       IGU_SRC_PXP0,
+       IGU_SRC_PXP1,
+       IGU_SRC_PXP2,
+       IGU_SRC_PXP3,
+       IGU_SRC_PXP4,
+       IGU_SRC_PXP5,
+       IGU_SRC_PXP6,
+       IGU_SRC_PXP7,
+       IGU_SRC_CAU,
+       IGU_SRC_ATTN,
+       IGU_SRC_GRC
+};
+
+enum igu_fifo_addr_types {
+       IGU_ADDR_TYPE_MSIX_MEM,
+       IGU_ADDR_TYPE_WRITE_PBA,
+       IGU_ADDR_TYPE_WRITE_INT_ACK,
+       IGU_ADDR_TYPE_WRITE_ATTN_BITS,
+       IGU_ADDR_TYPE_READ_INT,
+       IGU_ADDR_TYPE_WRITE_PROD_UPDATE,
+       IGU_ADDR_TYPE_RESERVED
+};
+
+struct igu_fifo_addr_data {
+       u16 start_addr;
+       u16 end_addr;
+       char *desc;
+       char *vf_desc;
+       enum igu_fifo_addr_types type;
+};
+
+/******************************** Constants **********************************/
+
+#define MAX_MSG_LEN                            1024
+#define MCP_TRACE_MAX_MODULE_LEN               8
+#define MCP_TRACE_FORMAT_MAX_PARAMS            3
+#define MCP_TRACE_FORMAT_PARAM_WIDTH \
+       (MCP_TRACE_FORMAT_P2_SIZE_SHIFT - MCP_TRACE_FORMAT_P1_SIZE_SHIFT)
+#define REG_FIFO_ELEMENT_ADDR_FACTOR           4
+#define REG_FIFO_ELEMENT_IS_PF_VF_VAL          127
+#define PROTECTION_OVERRIDE_ELEMENT_ADDR_FACTOR        4
+
+/********************************* Macros ************************************/
+
+#define BYTES_TO_DWORDS(bytes)                 ((bytes) / BYTES_IN_DWORD)
+
+/***************************** Constant Arrays *******************************/
+
+/* Status string array */
+static const char * const s_status_str[] = {
+       "Operation completed successfully",
+       "Debug application version wasn't set",
+       "Unsupported debug application version",
+       "The debug block wasn't reset since the last recording",
+       "Invalid arguments",
+       "The debug output was already set",
+       "Invalid PCI buffer size",
+       "PCI buffer allocation failed",
+       "A PCI buffer wasn't allocated",
+       "Too many inputs were enabled. Enabled less inputs, or set 'unifyInputs' to true",
+       "GRC/Timestamp input overlap in cycle dword 0",
+       "Cannot record Storm data since the entire recording cycle is used by HW",
+       "The Storm was already enabled",
+       "The specified Storm wasn't enabled",
+       "The block was already enabled",
+       "The specified block wasn't enabled",
+       "No input was enabled for recording",
+       "Filters and triggers are not allowed when recording in 64b units",
+       "The filter was already enabled",
+       "The trigger was already enabled",
+       "The trigger wasn't enabled",
+       "A constraint can be added only after a filter was enabled or a trigger state was added",
+       "Cannot add more than 3 trigger states",
+       "Cannot add more than 4 constraints per filter or trigger state",
+       "The recording wasn't started",
+       "A trigger was configured, but it didn't trigger",
+       "No data was recorded",
+       "Dump buffer is too small",
+       "Dumped data is not aligned to chunks",
+       "Unknown chip",
+       "Failed allocating virtual memory",
+       "The input block is in reset",
+       "Invalid MCP trace signature found in NVRAM",
+       "Invalid bundle ID found in NVRAM",
+       "Failed getting NVRAM image",
+       "NVRAM image is not dword-aligned",
+       "Failed reading from NVRAM",
+       "Idle check parsing failed",
+       "MCP Trace data is corrupt",
+       "Dump doesn't contain meta data - it must be provided in an image file",
+       "Failed to halt MCP",
+       "Failed to resume MCP after halt",
+       "DMAE transaction failed",
+       "Failed to empty SEMI sync FIFO",
+       "IGU FIFO data is corrupt",
+       "MCP failed to mask parities",
+       "FW Asserts parsing failed",
+       "GRC FIFO data is corrupt",
+       "Protection Override data is corrupt",
+       "Debug arrays were not set (when using binary files, dbg_set_bin_ptr must be called)",
+       "When a block is filtered, no other blocks can be recorded unless inputs are unified (due to a HW bug)"
+};
+
+/* Idle check severity names array */
+static const char * const s_idle_chk_severity_str[] = {
+       "Error",
+       "Error if no traffic",
+       "Warning"
+};
+
+/* MCP Trace level names array */
+static const char * const s_mcp_trace_level_str[] = {
+       "ERROR",
+       "TRACE",
+       "DEBUG"
+};
+
+/* Parsing strings */
+static const char * const s_access_strs[] = {
+       "read",
+       "write"
+};
+
+static const char * const s_privilege_strs[] = {
+       "VF",
+       "PDA",
+       "HV",
+       "UA"
+};
+
+static const char * const s_protection_strs[] = {
+       "(default)",
+       "(default)",
+       "(default)",
+       "(default)",
+       "override VF",
+       "override PDA",
+       "override HV",
+       "override UA"
+};
+
+static const char * const s_master_strs[] = {
+       "???",
+       "pxp",
+       "mcp",
+       "msdm",
+       "psdm",
+       "ysdm",
+       "usdm",
+       "tsdm",
+       "xsdm",
+       "dbu",
+       "dmae",
+       "???",
+       "???",
+       "???",
+       "???",
+       "???"
+};
+
+static const char * const s_reg_fifo_error_strs[] = {
+       "grc timeout",
+       "address doesn't belong to any block",
+       "reserved address in block or write to read-only address",
+       "privilege/protection mismatch",
+       "path isolation error"
+};
+
+static const char * const s_igu_fifo_source_strs[] = {
+       "TSTORM",
+       "MSTORM",
+       "USTORM",
+       "XSTORM",
+       "YSTORM",
+       "PSTORM",
+       "PCIE",
+       "NIG_QM_PBF",
+       "CAU",
+       "ATTN",
+       "GRC",
+};
+
+static const char * const s_igu_fifo_error_strs[] = {
+       "no error",
+       "length error",
+       "function disabled",
+       "VF sent command to attnetion address",
+       "host sent prod update command",
+       "read of during interrupt register while in MIMD mode",
+       "access to PXP BAR reserved address",
+       "producer update command to attention index",
+       "unknown error",
+       "SB index not valid",
+       "SB relative index and FID not found",
+       "FID not match",
+       "command with error flag asserted (PCI error or CAU discard)",
+       "VF sent cleanup and RF cleanup is disabled",
+       "cleanup command on type bigger than 4"
+};
+
+/* IGU FIFO address data */
+static const struct igu_fifo_addr_data s_igu_fifo_addr_data[] = {
+       {0x0, 0x101, "MSI-X Memory", NULL, IGU_ADDR_TYPE_MSIX_MEM},
+       {0x102, 0x1ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED},
+       {0x200, 0x200, "Write PBA[0:63]", NULL, IGU_ADDR_TYPE_WRITE_PBA},
+       {0x201, 0x201, "Write PBA[64:127]", "reserved",
+        IGU_ADDR_TYPE_WRITE_PBA},
+       {0x202, 0x202, "Write PBA[128]", "reserved", IGU_ADDR_TYPE_WRITE_PBA},
+       {0x203, 0x3ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED},
+       {0x400, 0x5ef, "Write interrupt acknowledgment", NULL,
+        IGU_ADDR_TYPE_WRITE_INT_ACK},
+       {0x5f0, 0x5f0, "Attention bits update", NULL,
+        IGU_ADDR_TYPE_WRITE_ATTN_BITS},
+       {0x5f1, 0x5f1, "Attention bits set", NULL,
+        IGU_ADDR_TYPE_WRITE_ATTN_BITS},
+       {0x5f2, 0x5f2, "Attention bits clear", NULL,
+        IGU_ADDR_TYPE_WRITE_ATTN_BITS},
+       {0x5f3, 0x5f3, "Read interrupt 0:63 with mask", NULL,
+        IGU_ADDR_TYPE_READ_INT},
+       {0x5f4, 0x5f4, "Read interrupt 0:31 with mask", NULL,
+        IGU_ADDR_TYPE_READ_INT},
+       {0x5f5, 0x5f5, "Read interrupt 32:63 with mask", NULL,
+        IGU_ADDR_TYPE_READ_INT},
+       {0x5f6, 0x5f6, "Read interrupt 0:63 without mask", NULL,
+        IGU_ADDR_TYPE_READ_INT},
+       {0x5f7, 0x5ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED},
+       {0x600, 0x7ff, "Producer update", NULL, IGU_ADDR_TYPE_WRITE_PROD_UPDATE}
+};
+
+/******************************** Variables **********************************/
+
+/* MCP Trace meta data - used in case the dump doesn't contain the meta data
+ * (e.g. due to no NVRAM access).
+ */
+static struct dbg_array s_mcp_trace_meta = { NULL, 0 };
+
+/* Temporary buffer, used for print size calculations */
+static char s_temp_buf[MAX_MSG_LEN];
+
+/***************************** Public Functions *******************************/
+
+enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr)
+{
+       /* Convert binary data to debug arrays */
+       u32 num_of_buffers = *(u32 *)bin_ptr;
+       struct bin_buffer_hdr *buf_array;
+       u8 buf_id;
+
+       buf_array = (struct bin_buffer_hdr *)((u32 *)bin_ptr + 1);
+
+       for (buf_id = 0; buf_id < num_of_buffers; buf_id++) {
+               s_dbg_arrays[buf_id].ptr =
+                   (u32 *)(bin_ptr + buf_array[buf_id].offset);
+               s_dbg_arrays[buf_id].size_in_dwords =
+                   BYTES_TO_DWORDS(buf_array[buf_id].length);
+       }
+
+       return DBG_STATUS_OK;
+}
+
+static u32 qed_cyclic_add(u32 a, u32 b, u32 size)
+{
+       return (a + b) % size;
+}
+
+static u32 qed_cyclic_sub(u32 a, u32 b, u32 size)
+{
+       return (size + a - b) % size;
+}
+
+/* Reads the specified number of bytes from the specified cyclic buffer (up to 4
+ * bytes) and returns them as a dword value. the specified buffer offset is
+ * updated.
+ */
+static u32 qed_read_from_cyclic_buf(void *buf,
+                                   u32 *offset,
+                                   u32 buf_size, u8 num_bytes_to_read)
+{
+       u8 *bytes_buf = (u8 *)buf;
+       u8 *val_ptr;
+       u32 val = 0;
+       u8 i;
+
+       val_ptr = (u8 *)&val;
+
+       for (i = 0; i < num_bytes_to_read; i++) {
+               val_ptr[i] = bytes_buf[*offset];
+               *offset = qed_cyclic_add(*offset, 1, buf_size);
+       }
+
+       return val;
+}
+
+/* Reads and returns the next byte from the specified buffer.
+ * The specified buffer offset is updated.
+ */
+static u8 qed_read_byte_from_buf(void *buf, u32 *offset)
+{
+       return ((u8 *)buf)[(*offset)++];
+}
+
+/* Reads and returns the next dword from the specified buffer.
+ * The specified buffer offset is updated.
+ */
+static u32 qed_read_dword_from_buf(void *buf, u32 *offset)
+{
+       u32 dword_val = *(u32 *)&((u8 *)buf)[*offset];
+
+       *offset += 4;
+       return dword_val;
+}
+
+/* Reads the next string from the specified buffer, and copies it to the
+ * specified pointer. The specified buffer offset is updated.
+ */
+static void qed_read_str_from_buf(void *buf, u32 *offset, u32 size, char *dest)
+{
+       const char *source_str = &((const char *)buf)[*offset];
+
+       strncpy(dest, source_str, size);
+       dest[size - 1] = '\0';
+       *offset += size;
+}
+
+/* Returns a pointer to the specified offset (in bytes) of the specified buffer.
+ * If the specified buffer in NULL, a temporary buffer pointer is returned.
+ */
+static char *qed_get_buf_ptr(void *buf, u32 offset)
+{
+       return buf ? (char *)buf + offset : s_temp_buf;
+}
+
+/* Reads a param from the specified buffer. Returns the number of dwords read.
+ * If the returned str_param is NULL, the param is numeric and its value is
+ * returned in num_param.
+ * Otheriwise, the param is a string and its pointer is returned in str_param.
+ */
+static u32 qed_read_param(u32 *dump_buf,
+                         const char **param_name,
+                         const char **param_str_val, u32 *param_num_val)
+{
+       char *char_buf = (char *)dump_buf;
+       u32 offset = 0; /* In bytes */
+
+       /* Extract param name */
+       *param_name = char_buf;
+       offset += strlen(*param_name) + 1;
+
+       /* Check param type */
+       if (*(char_buf + offset++)) {
+               /* String param */
+               *param_str_val = char_buf + offset;
+               offset += strlen(*param_str_val) + 1;
+               if (offset & 0x3)
+                       offset += (4 - (offset & 0x3));
+       } else {
+               /* Numeric param */
+               *param_str_val = NULL;
+               if (offset & 0x3)
+                       offset += (4 - (offset & 0x3));
+               *param_num_val = *(u32 *)(char_buf + offset);
+               offset += 4;
+       }
+
+       return offset / 4;
+}
+
+/* Reads a section header from the specified buffer.
+ * Returns the number of dwords read.
+ */
+static u32 qed_read_section_hdr(u32 *dump_buf,
+                               const char **section_name,
+                               u32 *num_section_params)
+{
+       const char *param_str_val;
+
+       return qed_read_param(dump_buf,
+                             section_name, &param_str_val, num_section_params);
+}
+
+/* Reads section params from the specified buffer and prints them to the results
+ * buffer. Returns the number of dwords read.
+ */
+static u32 qed_print_section_params(u32 *dump_buf,
+                                   u32 num_section_params,
+                                   char *results_buf, u32 *num_chars_printed)
+{
+       u32 i, dump_offset = 0, results_offset = 0;
+
+       for (i = 0; i < num_section_params; i++) {
+               const char *param_name;
+               const char *param_str_val;
+               u32 param_num_val = 0;
+
+               dump_offset += qed_read_param(dump_buf + dump_offset,
+                                             &param_name,
+                                             &param_str_val, &param_num_val);
+               if (param_str_val)
+                       /* String param */
+                       results_offset +=
+                               sprintf(qed_get_buf_ptr(results_buf,
+                                                       results_offset),
+                                       "%s: %s\n", param_name, param_str_val);
+               else if (strcmp(param_name, "fw-timestamp"))
+                       /* Numeric param */
+                       results_offset +=
+                               sprintf(qed_get_buf_ptr(results_buf,
+                                                       results_offset),
+                                       "%s: %d\n", param_name, param_num_val);
+       }
+
+       results_offset +=
+           sprintf(qed_get_buf_ptr(results_buf, results_offset), "\n");
+       *num_chars_printed = results_offset;
+       return dump_offset;
+}
+
+const char *qed_dbg_get_status_str(enum dbg_status status)
+{
+       return (status <
+               MAX_DBG_STATUS) ? s_status_str[status] : "Invalid debug status";
+}
+
+/* Parses the idle check rules and returns the number of characters printed.
+ * In case of parsing error, returns 0.
+ */
+static u32 qed_parse_idle_chk_dump_rules(struct qed_hwfn *p_hwfn,
+                                        u32 *dump_buf,
+                                        u32 *dump_buf_end,
+                                        u32 num_rules,
+                                        bool print_fw_idle_chk,
+                                        char *results_buf,
+                                        u32 *num_errors, u32 *num_warnings)
+{
+       u32 rule_idx, results_offset = 0; /* Offset in results_buf in bytes */
+       u16 i, j;
+
+       *num_errors = 0;
+       *num_warnings = 0;
+
+       /* Go over dumped results */
+       for (rule_idx = 0; rule_idx < num_rules && dump_buf < dump_buf_end;
+            rule_idx++) {
+               const struct dbg_idle_chk_rule_parsing_data *rule_parsing_data;
+               struct dbg_idle_chk_result_hdr *hdr;
+               const char *parsing_str;
+               u32 parsing_str_offset;
+               const char *lsi_msg;
+               u8 curr_reg_id = 0;
+               bool has_fw_msg;
+
+               hdr = (struct dbg_idle_chk_result_hdr *)dump_buf;
+               rule_parsing_data =
+                       (const struct dbg_idle_chk_rule_parsing_data *)
+                       &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA].
+                       ptr[hdr->rule_id];
+               parsing_str_offset =
+                       GET_FIELD(rule_parsing_data->data,
+                                 DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET);
+               has_fw_msg =
+                       GET_FIELD(rule_parsing_data->data,
+                               DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG) > 0;
+               parsing_str = &((const char *)
+                               s_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr)
+                               [parsing_str_offset];
+               lsi_msg = parsing_str;
+
+               if (hdr->severity >= MAX_DBG_IDLE_CHK_SEVERITY_TYPES)
+                       return 0;
+
+               /* Skip rule header */
+               dump_buf += (sizeof(struct dbg_idle_chk_result_hdr) / 4);
+
+               /* Update errors/warnings count */
+               if (hdr->severity == IDLE_CHK_SEVERITY_ERROR ||
+                   hdr->severity == IDLE_CHK_SEVERITY_ERROR_NO_TRAFFIC)
+                       (*num_errors)++;
+               else
+                       (*num_warnings)++;
+
+               /* Print rule severity */
+               results_offset +=
+                   sprintf(qed_get_buf_ptr(results_buf,
+                                           results_offset), "%s: ",
+                           s_idle_chk_severity_str[hdr->severity]);
+
+               /* Print rule message */
+               if (has_fw_msg)
+                       parsing_str += strlen(parsing_str) + 1;
+               results_offset +=
+                   sprintf(qed_get_buf_ptr(results_buf,
+                                           results_offset), "%s.",
+                           has_fw_msg &&
+                           print_fw_idle_chk ? parsing_str : lsi_msg);
+               parsing_str += strlen(parsing_str) + 1;
+
+               /* Print register values */
+               results_offset +=
+                   sprintf(qed_get_buf_ptr(results_buf,
+                                           results_offset), " Registers:");
+               for (i = 0;
+                    i < hdr->num_dumped_cond_regs + hdr->num_dumped_info_regs;
+                    i++) {
+                       struct dbg_idle_chk_result_reg_hdr *reg_hdr
+                           = (struct dbg_idle_chk_result_reg_hdr *)
+                           dump_buf;
+                       bool is_mem =
+                               GET_FIELD(reg_hdr->data,
+                                         DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM);
+                       u8 reg_id =
+                               GET_FIELD(reg_hdr->data,
+                                         DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID);
+
+                       /* Skip reg header */
+                       dump_buf +=
+                           (sizeof(struct dbg_idle_chk_result_reg_hdr) / 4);
+
+                       /* Skip register names until the required reg_id is
+                        * reached.
+                        */
+                       for (; reg_id > curr_reg_id;
+                            curr_reg_id++,
+                            parsing_str += strlen(parsing_str) + 1);
+
+                       results_offset +=
+                           sprintf(qed_get_buf_ptr(results_buf,
+                                                   results_offset), " %s",
+                                   parsing_str);
+                       if (i < hdr->num_dumped_cond_regs && is_mem)
+                               results_offset +=
+                                   sprintf(qed_get_buf_ptr(results_buf,
+                                                           results_offset),
+                                           "[%d]", hdr->mem_entry_id +
+                                           reg_hdr->start_entry);
+                       results_offset +=
+                           sprintf(qed_get_buf_ptr(results_buf,
+                                                   results_offset), "=");
+                       for (j = 0; j < reg_hdr->size; j++, dump_buf++) {
+                               results_offset +=
+                                   sprintf(qed_get_buf_ptr(results_buf,
+                                                           results_offset),
+                                           "0x%x", *dump_buf);
+                               if (j < reg_hdr->size - 1)
+                                       results_offset +=
+                                           sprintf(qed_get_buf_ptr
+                                                   (results_buf,
+                                                    results_offset), ",");
+                       }
+               }
+
+               results_offset +=
+                   sprintf(qed_get_buf_ptr(results_buf, results_offset), "\n");
+       }
+
+       /* Check if end of dump buffer was exceeded */
+       if (dump_buf > dump_buf_end)
+               return 0;
+       return results_offset;
+}
+
+/* Parses an idle check dump buffer.
+ * If result_buf is not NULL, the idle check results are printed to it.
+ * In any case, the required results buffer size is assigned to
+ * parsed_results_bytes.
+ * The parsing status is returned.
+ */
+static enum dbg_status qed_parse_idle_chk_dump(struct qed_hwfn *p_hwfn,
+                                              u32 *dump_buf,
+                                              u32 num_dumped_dwords,
+                                              char *results_buf,
+                                              u32 *parsed_results_bytes,
+                                              u32 *num_errors,
+                                              u32 *num_warnings)
+{
+       const char *section_name, *param_name, *param_str_val;
+       u32 *dump_buf_end = dump_buf + num_dumped_dwords;
+       u32 num_section_params = 0, num_rules;
+       u32 results_offset = 0; /* Offset in results_buf in bytes */
+
+       *parsed_results_bytes = 0;
+       *num_errors = 0;
+       *num_warnings = 0;
+       if (!s_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr ||
+           !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA].ptr)
+               return DBG_STATUS_DBG_ARRAY_NOT_SET;
+
+       /* Read global_params section */
+       dump_buf += qed_read_section_hdr(dump_buf,
+                                        &section_name, &num_section_params);
+       if (strcmp(section_name, "global_params"))
+               return DBG_STATUS_IDLE_CHK_PARSE_FAILED;
+
+       /* Print global params */
+       dump_buf += qed_print_section_params(dump_buf,
+                                            num_section_params,
+                                            results_buf, &results_offset);
+
+       /* Read idle_chk section */
+       dump_buf += qed_read_section_hdr(dump_buf,
+                                        &section_name, &num_section_params);
+       if (strcmp(section_name, "idle_chk") || num_section_params != 1)
+               return DBG_STATUS_IDLE_CHK_PARSE_FAILED;
+
+       dump_buf += qed_read_param(dump_buf,
+                                  &param_name, &param_str_val, &num_rules);
+       if (strcmp(param_name, "num_rules") != 0)
+               return DBG_STATUS_IDLE_CHK_PARSE_FAILED;
+
+       if (num_rules) {
+               u32 rules_print_size;
+
+               /* Print FW output */
+               results_offset +=
+                   sprintf(qed_get_buf_ptr(results_buf,
+                                           results_offset),
+                           "FW_IDLE_CHECK:\n");
+               rules_print_size =
+                       qed_parse_idle_chk_dump_rules(p_hwfn, dump_buf,
+                                                     dump_buf_end, num_rules,
+                                                     true,
+                                                     results_buf ?
+                                                     results_buf +
+                                                     results_offset : NULL,
+                                                     num_errors, num_warnings);
+               results_offset += rules_print_size;
+               if (rules_print_size == 0)
+                       return DBG_STATUS_IDLE_CHK_PARSE_FAILED;
+
+               /* Print LSI output */
+               results_offset +=
+                   sprintf(qed_get_buf_ptr(results_buf,
+                                           results_offset),
+                           "\nLSI_IDLE_CHECK:\n");
+               rules_print_size =
+                       qed_parse_idle_chk_dump_rules(p_hwfn, dump_buf,
+                                                     dump_buf_end, num_rules,
+                                                     false,
+                                                     results_buf ?
+                                                     results_buf +
+                                                     results_offset : NULL,
+                                                     num_errors, num_warnings);
+               results_offset += rules_print_size;
+               if (rules_print_size == 0)
+                       return DBG_STATUS_IDLE_CHK_PARSE_FAILED;
+       }
+
+       /* Print errors/warnings count */
+       if (*num_errors) {
+               results_offset +=
+                   sprintf(qed_get_buf_ptr(results_buf,
+                                           results_offset),
+                           "\nIdle Check failed!!! (with %d errors and %d warnings)\n",
+                           *num_errors, *num_warnings);
+       } else if (*num_warnings) {
+               results_offset +=
+                   sprintf(qed_get_buf_ptr(results_buf,
+                                           results_offset),
+                           "\nIdle Check completed successfuly (with %d warnings)\n",
+                           *num_warnings);
+       } else {
+               results_offset +=
+                   sprintf(qed_get_buf_ptr(results_buf,
+                                           results_offset),
+                           "\nIdle Check completed successfuly\n");
+       }
+
+       /* Add 1 for string NULL termination */
+       *parsed_results_bytes = results_offset + 1;
+       return DBG_STATUS_OK;
+}
+
+enum dbg_status qed_get_idle_chk_results_buf_size(struct qed_hwfn *p_hwfn,
+                                                 u32 *dump_buf,
+                                                 u32 num_dumped_dwords,
+                                                 u32 *results_buf_size)
+{
+       u32 num_errors, num_warnings;
+
+       return qed_parse_idle_chk_dump(p_hwfn,
+                                      dump_buf,
+                                      num_dumped_dwords,
+                                      NULL,
+                                      results_buf_size,
+                                      &num_errors, &num_warnings);
+}
+
+enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn,
+                                          u32 *dump_buf,
+                                          u32 num_dumped_dwords,
+                                          char *results_buf,
+                                          u32 *num_errors, u32 *num_warnings)
+{
+       u32 parsed_buf_size;
+
+       return qed_parse_idle_chk_dump(p_hwfn,
+                                      dump_buf,
+                                      num_dumped_dwords,
+                                      results_buf,
+                                      &parsed_buf_size,
+                                      num_errors, num_warnings);
+}
+
+/* Frees the specified MCP Trace meta data */
+static void qed_mcp_trace_free_meta(struct qed_hwfn *p_hwfn,
+                                   struct mcp_trace_meta *meta)
+{
+       u32 i;
+
+       /* Release modules */
+       if (meta->modules) {
+               for (i = 0; i < meta->modules_num; i++)
+                       kfree(meta->modules[i]);
+               kfree(meta->modules);
+       }
+
+       /* Release formats */
+       if (meta->formats) {
+               for (i = 0; i < meta->formats_num; i++)
+                       kfree(meta->formats[i].format_str);
+               kfree(meta->formats);
+       }
+}
+
+/* Allocates and fills MCP Trace meta data based on the specified meta data
+ * dump buffer.
+ * Returns debug status code.
+ */
+static enum dbg_status qed_mcp_trace_alloc_meta(struct qed_hwfn *p_hwfn,
+                                               const u32 *meta_buf,
+                                               struct mcp_trace_meta *meta)
+{
+       u8 *meta_buf_bytes = (u8 *)meta_buf;
+       u32 offset = 0, signature, i;
+
+       memset(meta, 0, sizeof(*meta));
+
+       /* Read first signature */
+       signature = qed_read_dword_from_buf(meta_buf_bytes, &offset);
+       if (signature != MCP_TRACE_META_IMAGE_SIGNATURE)
+               return DBG_STATUS_INVALID_TRACE_SIGNATURE;
+
+       /* Read number of modules and allocate memory for all the modules
+        * pointers.
+        */
+       meta->modules_num = qed_read_byte_from_buf(meta_buf_bytes, &offset);
+       meta->modules = kzalloc(meta->modules_num * sizeof(char *), GFP_KERNEL);
+       if (!meta->modules)
+               return DBG_STATUS_VIRT_MEM_ALLOC_FAILED;
+
+       /* Allocate and read all module strings */
+       for (i = 0; i < meta->modules_num; i++) {
+               u8 module_len = qed_read_byte_from_buf(meta_buf_bytes, &offset);
+
+               *(meta->modules + i) = kzalloc(module_len, GFP_KERNEL);
+               if (!(*(meta->modules + i))) {
+                       /* Update number of modules to be released */
+                       meta->modules_num = i ? i - 1 : 0;
+                       return DBG_STATUS_VIRT_MEM_ALLOC_FAILED;
+               }
+
+               qed_read_str_from_buf(meta_buf_bytes, &offset, module_len,
+                                     *(meta->modules + i));
+               if (module_len > MCP_TRACE_MAX_MODULE_LEN)
+                       (*(meta->modules + i))[MCP_TRACE_MAX_MODULE_LEN] = '\0';
+       }
+
+       /* Read second signature */
+       signature = qed_read_dword_from_buf(meta_buf_bytes, &offset);
+       if (signature != MCP_TRACE_META_IMAGE_SIGNATURE)
+               return DBG_STATUS_INVALID_TRACE_SIGNATURE;
+
+       /* Read number of formats and allocate memory for all formats */
+       meta->formats_num = qed_read_dword_from_buf(meta_buf_bytes, &offset);
+       meta->formats = kzalloc(meta->formats_num *
+                               sizeof(struct mcp_trace_format),
+                               GFP_KERNEL);
+       if (!meta->formats)
+               return DBG_STATUS_VIRT_MEM_ALLOC_FAILED;
+
+       /* Allocate and read all strings */
+       for (i = 0; i < meta->formats_num; i++) {
+               struct mcp_trace_format *format_ptr = &meta->formats[i];
+               u8 format_len;
+
+               format_ptr->data = qed_read_dword_from_buf(meta_buf_bytes,
+                                                          &offset);
+               format_len =
+                   (format_ptr->data &
+                    MCP_TRACE_FORMAT_LEN_MASK) >> MCP_TRACE_FORMAT_LEN_SHIFT;
+               format_ptr->format_str = kzalloc(format_len, GFP_KERNEL);
+               if (!format_ptr->format_str) {
+                       /* Update number of modules to be released */
+                       meta->formats_num = i ? i - 1 : 0;
+                       return DBG_STATUS_VIRT_MEM_ALLOC_FAILED;
+               }
+
+               qed_read_str_from_buf(meta_buf_bytes,
+                                     &offset,
+                                     format_len, format_ptr->format_str);
+       }
+
+       return DBG_STATUS_OK;
+}
+
+/* Parses an MCP Trace dump buffer.
+ * If result_buf is not NULL, the MCP Trace results are printed to it.
+ * In any case, the required results buffer size is assigned to
+ * parsed_results_bytes.
+ * The parsing status is returned.
+ */
+static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn,
+                                               u32 *dump_buf,
+                                               u32 num_dumped_dwords,
+                                               char *results_buf,
+                                               u32 *parsed_results_bytes)
+{
+       u32 results_offset = 0, param_mask, param_shift, param_num_val;
+       u32 num_section_params, offset, end_offset, bytes_left;
+       const char *section_name, *param_name, *param_str_val;
+       u32 trace_data_dwords, trace_meta_dwords;
+       struct mcp_trace_meta meta;
+       struct mcp_trace *trace;
+       enum dbg_status status;
+       const u32 *meta_buf;
+       u8 *trace_buf;
+
+       *parsed_results_bytes = 0;
+
+       /* Read global_params section */
+       dump_buf += qed_read_section_hdr(dump_buf,
+                                        &section_name, &num_section_params);
+       if (strcmp(section_name, "global_params"))
+               return DBG_STATUS_MCP_TRACE_BAD_DATA;
+
+       /* Print global params */
+       dump_buf += qed_print_section_params(dump_buf,
+                                            num_section_params,
+                                            results_buf, &results_offset);
+
+       /* Read trace_data section */
+       dump_buf += qed_read_section_hdr(dump_buf,
+                                        &section_name, &num_section_params);
+       if (strcmp(section_name, "mcp_trace_data") || num_section_params != 1)
+               return DBG_STATUS_MCP_TRACE_BAD_DATA;
+       dump_buf += qed_read_param(dump_buf,
+                                  &param_name, &param_str_val, &param_num_val);
+       if (strcmp(param_name, "size"))
+               return DBG_STATUS_MCP_TRACE_BAD_DATA;
+       trace_data_dwords = param_num_val;
+
+       /* Prepare trace info */
+       trace = (struct mcp_trace *)dump_buf;
+       trace_buf = (u8 *)dump_buf + sizeof(struct mcp_trace);
+       offset = trace->trace_oldest;
+       end_offset = trace->trace_prod;
+       bytes_left = qed_cyclic_sub(end_offset, offset, trace->size);
+       dump_buf += trace_data_dwords;
+
+       /* Read meta_data section */
+       dump_buf += qed_read_section_hdr(dump_buf,
+                                        &section_name, &num_section_params);
+       if (strcmp(section_name, "mcp_trace_meta"))
+               return DBG_STATUS_MCP_TRACE_BAD_DATA;
+       dump_buf += qed_read_param(dump_buf,
+                                  &param_name, &param_str_val, &param_num_val);
+       if (strcmp(param_name, "size") != 0)
+               return DBG_STATUS_MCP_TRACE_BAD_DATA;
+       trace_meta_dwords = param_num_val;
+
+       /* Choose meta data buffer */
+       if (!trace_meta_dwords) {
+               /* Dump doesn't include meta data */
+               if (!s_mcp_trace_meta.ptr)
+                       return DBG_STATUS_MCP_TRACE_NO_META;
+               meta_buf = s_mcp_trace_meta.ptr;
+       } else {
+               /* Dump includes meta data */
+               meta_buf = dump_buf;
+       }
+
+       /* Allocate meta data memory */
+       status = qed_mcp_trace_alloc_meta(p_hwfn, meta_buf, &meta);
+       if (status != DBG_STATUS_OK)
+               goto free_mem;
+
+       /* Ignore the level and modules masks - just print everything that is
+        * already in the buffer.
+        */
+       while (bytes_left) {
+               struct mcp_trace_format *format_ptr;
+               u8 format_level, format_module;
+               u32 params[3] = { 0, 0, 0 };
+               u32 header, format_idx, i;
+
+               if (bytes_left < MFW_TRACE_ENTRY_SIZE) {
+                       status = DBG_STATUS_MCP_TRACE_BAD_DATA;
+                       goto free_mem;
+               }
+
+               header = qed_read_from_cyclic_buf(trace_buf,
+                                                 &offset,
+                                                 trace->size,
+                                                 MFW_TRACE_ENTRY_SIZE);
+               bytes_left -= MFW_TRACE_ENTRY_SIZE;
+               format_idx = header & MFW_TRACE_EVENTID_MASK;
+
+               /* Skip message if its  index doesn't exist in the meta data */
+               if (format_idx > meta.formats_num) {
+                       u8 format_size =
+                           (u8)((header &
+                                 MFW_TRACE_PRM_SIZE_MASK) >>
+                                MFW_TRACE_PRM_SIZE_SHIFT);
+
+                       if (bytes_left < format_size) {
+                               status = DBG_STATUS_MCP_TRACE_BAD_DATA;
+                               goto free_mem;
+                       }
+
+                       offset = qed_cyclic_add(offset,
+                                               format_size, trace->size);
+                       bytes_left -= format_size;
+                       continue;
+               }
+
+               format_ptr = &meta.formats[format_idx];
+               for (i = 0,
+                    param_mask = MCP_TRACE_FORMAT_P1_SIZE_MASK, param_shift =
+                    MCP_TRACE_FORMAT_P1_SIZE_SHIFT;
+                    i < MCP_TRACE_FORMAT_MAX_PARAMS;
+                    i++, param_mask <<= MCP_TRACE_FORMAT_PARAM_WIDTH,
+                    param_shift += MCP_TRACE_FORMAT_PARAM_WIDTH) {
+                       /* Extract param size (0..3) */
+                       u8 param_size =
+                           (u8)((format_ptr->data &
+                                 param_mask) >> param_shift);
+
+                       /* If the param size is zero, there are no other
+                        * parameters.
+                        */
+                       if (!param_size)
+                               break;
+
+                       /* Size is encoded using 2 bits, where 3 is used to
+                        * encode 4.
+                        */
+                       if (param_size == 3)
+                               param_size = 4;
+                       if (bytes_left < param_size) {
+                               status = DBG_STATUS_MCP_TRACE_BAD_DATA;
+                               goto free_mem;
+                       }
+
+                       params[i] = qed_read_from_cyclic_buf(trace_buf,
+                                                            &offset,
+                                                            trace->size,
+                                                            param_size);
+                       bytes_left -= param_size;
+               }
+
+               format_level =
+                   (u8)((format_ptr->data &
+                         MCP_TRACE_FORMAT_LEVEL_MASK) >>
+                         MCP_TRACE_FORMAT_LEVEL_SHIFT);
+               format_module =
+                   (u8)((format_ptr->data &
+                         MCP_TRACE_FORMAT_MODULE_MASK) >>
+                        MCP_TRACE_FORMAT_MODULE_SHIFT);
+               if (format_level >= ARRAY_SIZE(s_mcp_trace_level_str)) {
+                       status = DBG_STATUS_MCP_TRACE_BAD_DATA;
+                       goto free_mem;
+               }
+
+               /* Print current message to results buffer */
+               results_offset +=
+                   sprintf(qed_get_buf_ptr(results_buf,
+                                           results_offset), "%s %-8s: ",
+                           s_mcp_trace_level_str[format_level],
+                           meta.modules[format_module]);
+               results_offset +=
+                   sprintf(qed_get_buf_ptr(results_buf,
+                                           results_offset),
+                           format_ptr->format_str, params[0], params[1],
+                           params[2]);
+       }
+
+free_mem:
+       *parsed_results_bytes = results_offset + 1;
+       qed_mcp_trace_free_meta(p_hwfn, &meta);
+       return status;
+}
+
+enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn,
+                                                  u32 *dump_buf,
+                                                  u32 num_dumped_dwords,
+                                                  u32 *results_buf_size)
+{
+       return qed_parse_mcp_trace_dump(p_hwfn,
+                                       dump_buf,
+                                       num_dumped_dwords,
+                                       NULL, results_buf_size);
+}
+
+enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
+                                           u32 *dump_buf,
+                                           u32 num_dumped_dwords,
+                                           char *results_buf)
+{
+       u32 parsed_buf_size;
+
+       return qed_parse_mcp_trace_dump(p_hwfn,
+                                       dump_buf,
+                                       num_dumped_dwords,
+                                       results_buf, &parsed_buf_size);
+}
+
+/* Parses a Reg FIFO dump buffer.
+ * If result_buf is not NULL, the Reg FIFO results are printed to it.
+ * In any case, the required results buffer size is assigned to
+ * parsed_results_bytes.
+ * The parsing status is returned.
+ */
+static enum dbg_status qed_parse_reg_fifo_dump(struct qed_hwfn *p_hwfn,
+                                              u32 *dump_buf,
+                                              u32 num_dumped_dwords,
+                                              char *results_buf,
+                                              u32 *parsed_results_bytes)
+{
+       u32 results_offset = 0, param_num_val, num_section_params, num_elements;
+       const char *section_name, *param_name, *param_str_val;
+       struct reg_fifo_element *elements;
+       u8 i, j, err_val, vf_val;
+       char vf_str[4];
+
+       /* Read global_params section */
+       dump_buf += qed_read_section_hdr(dump_buf,
+                                        &section_name, &num_section_params);
+       if (strcmp(section_name, "global_params"))
+               return DBG_STATUS_REG_FIFO_BAD_DATA;
+
+       /* Print global params */
+       dump_buf += qed_print_section_params(dump_buf,
+                                            num_section_params,
+                                            results_buf, &results_offset);
+
+       /* Read reg_fifo_data section */
+       dump_buf += qed_read_section_hdr(dump_buf,
+                                        &section_name, &num_section_params);
+       if (strcmp(section_name, "reg_fifo_data"))
+               return DBG_STATUS_REG_FIFO_BAD_DATA;
+       dump_buf += qed_read_param(dump_buf,
+                                  &param_name, &param_str_val, &param_num_val);
+       if (strcmp(param_name, "size"))
+               return DBG_STATUS_REG_FIFO_BAD_DATA;
+       if (param_num_val % REG_FIFO_ELEMENT_DWORDS)
+               return DBG_STATUS_REG_FIFO_BAD_DATA;
+       num_elements = param_num_val / REG_FIFO_ELEMENT_DWORDS;
+       elements = (struct reg_fifo_element *)dump_buf;
+
+       /* Decode elements */
+       for (i = 0; i < num_elements; i++) {
+               bool err_printed = false;
+
+               /* Discover if element belongs to a VF or a PF */
+               vf_val = GET_FIELD(elements[i].data, REG_FIFO_ELEMENT_VF);
+               if (vf_val == REG_FIFO_ELEMENT_IS_PF_VF_VAL)
+                       sprintf(vf_str, "%s", "N/A");
+               else
+                       sprintf(vf_str, "%d", vf_val);
+
+               /* Add parsed element to parsed buffer */
+               results_offset +=
+                   sprintf(qed_get_buf_ptr(results_buf,
+                                           results_offset),
+                           "raw: 0x%016llx, address: 0x%07llx, access: %-5s, pf: %2lld, vf: %s, port: %lld, privilege: %-3s, protection: %-12s, master: %-4s, errors: ",
+                           elements[i].data,
+                           GET_FIELD(elements[i].data,
+                                     REG_FIFO_ELEMENT_ADDRESS) *
+                                     REG_FIFO_ELEMENT_ADDR_FACTOR,
+                                     s_access_strs[GET_FIELD(elements[i].data,
+                                                   REG_FIFO_ELEMENT_ACCESS)],
+                           GET_FIELD(elements[i].data,
+                                     REG_FIFO_ELEMENT_PF), vf_str,
+                           GET_FIELD(elements[i].data,
+                                     REG_FIFO_ELEMENT_PORT),
+                                     s_privilege_strs[GET_FIELD(elements[i].
+                                     data,
+                                     REG_FIFO_ELEMENT_PRIVILEGE)],
+                           s_protection_strs[GET_FIELD(elements[i].data,
+                                               REG_FIFO_ELEMENT_PROTECTION)],
+                           s_master_strs[GET_FIELD(elements[i].data,
+                                               REG_FIFO_ELEMENT_MASTER)]);
+
+               /* Print errors */
+               for (j = 0,
+                    err_val = GET_FIELD(elements[i].data,
+                                        REG_FIFO_ELEMENT_ERROR);
+                    j < ARRAY_SIZE(s_reg_fifo_error_strs);
+                    j++, err_val >>= 1) {
+                       if (!(err_val & 0x1))
+                               continue;
+                       if (err_printed)
+                               results_offset +=
+                                       sprintf(qed_get_buf_ptr(results_buf,
+                                                               results_offset),
+                                               ", ");
+                       results_offset +=
+                               sprintf(qed_get_buf_ptr(results_buf,
+                                                       results_offset), "%s",
+                                       s_reg_fifo_error_strs[j]);
+                       err_printed = true;
+               }
+
+               results_offset +=
+                   sprintf(qed_get_buf_ptr(results_buf, results_offset), "\n");
+       }
+
+       results_offset += sprintf(qed_get_buf_ptr(results_buf,
+                                                 results_offset),
+                                 "fifo contained %d elements", num_elements);
+
+       /* Add 1 for string NULL termination */
+       *parsed_results_bytes = results_offset + 1;
+       return DBG_STATUS_OK;
+}
+
+enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
+                                                 u32 *dump_buf,
+                                                 u32 num_dumped_dwords,
+                                                 u32 *results_buf_size)
+{
+       return qed_parse_reg_fifo_dump(p_hwfn,
+                                      dump_buf,
+                                      num_dumped_dwords,
+                                      NULL, results_buf_size);
+}
+
+enum dbg_status qed_print_reg_fifo_results(struct qed_hwfn *p_hwfn,
+                                          u32 *dump_buf,
+                                          u32 num_dumped_dwords,
+                                          char *results_buf)
+{
+       u32 parsed_buf_size;
+
+       return qed_parse_reg_fifo_dump(p_hwfn,
+                                      dump_buf,
+                                      num_dumped_dwords,
+                                      results_buf, &parsed_buf_size);
+}
+
+/* Parses an IGU FIFO dump buffer.
+ * If result_buf is not NULL, the IGU FIFO results are printed to it.
+ * In any case, the required results buffer size is assigned to
+ * parsed_results_bytes.
+ * The parsing status is returned.
+ */
+static enum dbg_status qed_parse_igu_fifo_dump(struct qed_hwfn *p_hwfn,
+                                              u32 *dump_buf,
+                                              u32 num_dumped_dwords,
+                                              char *results_buf,
+                                              u32 *parsed_results_bytes)
+{
+       u32 results_offset = 0, param_num_val, num_section_params, num_elements;
+       const char *section_name, *param_name, *param_str_val;
+       struct igu_fifo_element *elements;
+       char parsed_addr_data[32];
+       char parsed_wr_data[256];
+       u8 i, j;
+
+       /* Read global_params section */
+       dump_buf += qed_read_section_hdr(dump_buf,
+                                        &section_name, &num_section_params);
+       if (strcmp(section_name, "global_params"))
+               return DBG_STATUS_IGU_FIFO_BAD_DATA;
+
+       /* Print global params */
+       dump_buf += qed_print_section_params(dump_buf,
+                                            num_section_params,
+                                            results_buf, &results_offset);
+
+       /* Read igu_fifo_data section */
+       dump_buf += qed_read_section_hdr(dump_buf,
+                                        &section_name, &num_section_params);
+       if (strcmp(section_name, "igu_fifo_data"))
+               return DBG_STATUS_IGU_FIFO_BAD_DATA;
+       dump_buf += qed_read_param(dump_buf,
+                                  &param_name, &param_str_val, &param_num_val);
+       if (strcmp(param_name, "size"))
+               return DBG_STATUS_IGU_FIFO_BAD_DATA;
+       if (param_num_val % IGU_FIFO_ELEMENT_DWORDS)
+               return DBG_STATUS_IGU_FIFO_BAD_DATA;
+       num_elements = param_num_val / IGU_FIFO_ELEMENT_DWORDS;
+       elements = (struct igu_fifo_element *)dump_buf;
+
+       /* Decode elements */
+       for (i = 0; i < num_elements; i++) {
+               /* dword12 (dword index 1 and 2) contains bits 32..95 of the
+                * FIFO element.
+                */
+               u64 dword12 =
+                   ((u64)elements[i].dword2 << 32) | elements[i].dword1;
+               bool is_wr_cmd = GET_FIELD(dword12,
+                                          IGU_FIFO_ELEMENT_DWORD12_IS_WR_CMD);
+               bool is_pf = GET_FIELD(elements[i].dword0,
+                                      IGU_FIFO_ELEMENT_DWORD0_IS_PF);
+               u16 cmd_addr = GET_FIELD(elements[i].dword0,
+                                        IGU_FIFO_ELEMENT_DWORD0_CMD_ADDR);
+               u8 source = GET_FIELD(elements[i].dword0,
+                                     IGU_FIFO_ELEMENT_DWORD0_SOURCE);
+               u8 err_type = GET_FIELD(elements[i].dword0,
+                                       IGU_FIFO_ELEMENT_DWORD0_ERR_TYPE);
+               const struct igu_fifo_addr_data *addr_data = NULL;
+
+               if (source >= ARRAY_SIZE(s_igu_fifo_source_strs))
+                       return DBG_STATUS_IGU_FIFO_BAD_DATA;
+               if (err_type >= ARRAY_SIZE(s_igu_fifo_error_strs))
+                       return DBG_STATUS_IGU_FIFO_BAD_DATA;
+
+               /* Find address data */
+               for (j = 0; j < ARRAY_SIZE(s_igu_fifo_addr_data) && !addr_data;
+                    j++)
+                       if (cmd_addr >= s_igu_fifo_addr_data[j].start_addr &&
+                           cmd_addr <= s_igu_fifo_addr_data[j].end_addr)
+                               addr_data = &s_igu_fifo_addr_data[j];
+               if (!addr_data)
+                       return DBG_STATUS_IGU_FIFO_BAD_DATA;
+
+               /* Prepare parsed address data */
+               switch (addr_data->type) {
+               case IGU_ADDR_TYPE_MSIX_MEM:
+                       sprintf(parsed_addr_data,
+                               " vector_num=0x%x", cmd_addr / 2);
+                       break;
+               case IGU_ADDR_TYPE_WRITE_INT_ACK:
+               case IGU_ADDR_TYPE_WRITE_PROD_UPDATE:
+                       sprintf(parsed_addr_data,
+                               " SB=0x%x", cmd_addr - addr_data->start_addr);
+                       break;
+               default:
+                       parsed_addr_data[0] = '\0';
+               }
+
+               /* Prepare parsed write data */
+               if (is_wr_cmd) {
+                       u32 wr_data = GET_FIELD(dword12,
+                                       IGU_FIFO_ELEMENT_DWORD12_WR_DATA);
+                       u32 prod_cons = GET_FIELD(wr_data,
+                                                 IGU_FIFO_WR_DATA_PROD_CONS);
+                       u8 is_cleanup = GET_FIELD(wr_data,
+                                                 IGU_FIFO_WR_DATA_CMD_TYPE);
+
+                       if (source == IGU_SRC_ATTN) {
+                               sprintf(parsed_wr_data,
+                                       "prod: 0x%x, ", prod_cons);
+                       } else {
+                               if (is_cleanup) {
+                                       u8 cleanup_val = GET_FIELD(wr_data,
+                                                                  IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_VAL);
+                                       u8 cleanup_type = GET_FIELD(wr_data,
+                                                                   IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_TYPE);
+
+                                       sprintf(parsed_wr_data,
+                                               "cmd_type: cleanup, cleanup_val: %s, cleanup_type: %d, ",
+                                               cleanup_val ? "set" : "clear",
+                                               cleanup_type);
+                               } else {
+                                       u8 update_flag = GET_FIELD(wr_data,
+                                                                  IGU_FIFO_WR_DATA_UPDATE_FLAG);
+                                       u8 en_dis_int_for_sb =
+                                           GET_FIELD(wr_data,
+                                                     IGU_FIFO_WR_DATA_EN_DIS_INT_FOR_SB);
+                                       u8 segment = GET_FIELD(wr_data,
+                                                              IGU_FIFO_WR_DATA_SEGMENT);
+                                       u8 timer_mask = GET_FIELD(wr_data,
+                                                                 IGU_FIFO_WR_DATA_TIMER_MASK);
+
+                                       sprintf(parsed_wr_data,
+                                               "cmd_type: prod/cons update, prod/cons: 0x%x, update_flag: %s, en_dis_int_for_sb: %s, segment: %s, timer_mask=%d, ",
+                                               prod_cons,
+                                               update_flag ? "update" : "nop",
+                                               en_dis_int_for_sb
+                                               ? (en_dis_int_for_sb ==
+                                                  1 ? "disable" : "nop") :
+                                               "enable",
+                                               segment ? "attn" : "regular",
+                                               timer_mask);
+                               }
+                       }
+               } else {
+                       parsed_wr_data[0] = '\0';
+               }
+
+               /* Add parsed element to parsed buffer */
+               results_offset +=
+                   sprintf(qed_get_buf_ptr(results_buf,
+                                           results_offset),
+                           "raw: 0x%01x%08x%08x, %s: %d, source: %s, type: %s, cmd_addr: 0x%x (%s%s), %serror: %s\n",
+                           elements[i].dword2, elements[i].dword1,
+                           elements[i].dword0,
+                           is_pf ? "pf" : "vf",
+                           GET_FIELD(elements[i].dword0,
+                                     IGU_FIFO_ELEMENT_DWORD0_FID),
+                           s_igu_fifo_source_strs[source],
+                           is_wr_cmd ? "wr" : "rd", cmd_addr,
+                           (!is_pf && addr_data->vf_desc)
+                           ? addr_data->vf_desc : addr_data->desc,
+                           parsed_addr_data, parsed_wr_data,
+                           s_igu_fifo_error_strs[err_type]);
+       }
+
+       results_offset += sprintf(qed_get_buf_ptr(results_buf,
+                                                 results_offset),
+                                 "fifo contained %d elements", num_elements);
+
+       /* Add 1 for string NULL termination */
+       *parsed_results_bytes = results_offset + 1;
+       return DBG_STATUS_OK;
+}
+
+enum dbg_status qed_get_igu_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
+                                                 u32 *dump_buf,
+                                                 u32 num_dumped_dwords,
+                                                 u32 *results_buf_size)
+{
+       return qed_parse_igu_fifo_dump(p_hwfn,
+                                      dump_buf,
+                                      num_dumped_dwords,
+                                      NULL, results_buf_size);
+}
+
+enum dbg_status qed_print_igu_fifo_results(struct qed_hwfn *p_hwfn,
+                                          u32 *dump_buf,
+                                          u32 num_dumped_dwords,
+                                          char *results_buf)
+{
+       u32 parsed_buf_size;
+
+       return qed_parse_igu_fifo_dump(p_hwfn,
+                                      dump_buf,
+                                      num_dumped_dwords,
+                                      results_buf, &parsed_buf_size);
+}
+
+static enum dbg_status
+qed_parse_protection_override_dump(struct qed_hwfn *p_hwfn,
+                                  u32 *dump_buf,
+                                  u32 num_dumped_dwords,
+                                  char *results_buf,
+                                  u32 *parsed_results_bytes)
+{
+       u32 results_offset = 0, param_num_val, num_section_params, num_elements;
+       const char *section_name, *param_name, *param_str_val;
+       struct protection_override_element *elements;
+       u8 i;
+
+       /* Read global_params section */
+       dump_buf += qed_read_section_hdr(dump_buf,
+                                        &section_name, &num_section_params);
+       if (strcmp(section_name, "global_params"))
+               return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA;
+
+       /* Print global params */
+       dump_buf += qed_print_section_params(dump_buf,
+                                            num_section_params,
+                                            results_buf, &results_offset);
+
+       /* Read protection_override_data section */
+       dump_buf += qed_read_section_hdr(dump_buf,
+                                        &section_name, &num_section_params);
+       if (strcmp(section_name, "protection_override_data"))
+               return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA;
+       dump_buf += qed_read_param(dump_buf,
+                                  &param_name, &param_str_val, &param_num_val);
+       if (strcmp(param_name, "size"))
+               return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA;
+       if (param_num_val % PROTECTION_OVERRIDE_ELEMENT_DWORDS != 0)
+               return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA;
+       num_elements = param_num_val / PROTECTION_OVERRIDE_ELEMENT_DWORDS;
+       elements = (struct protection_override_element *)dump_buf;
+
+       /* Decode elements */
+       for (i = 0; i < num_elements; i++) {
+               u32 address = GET_FIELD(elements[i].data,
+                                       PROTECTION_OVERRIDE_ELEMENT_ADDRESS) *
+                                       PROTECTION_OVERRIDE_ELEMENT_ADDR_FACTOR;
+
+               results_offset +=
+                   sprintf(qed_get_buf_ptr(results_buf,
+                                           results_offset),
+                           "window %2d, address: 0x%07x, size: %7lld regs, read: %lld, write: %lld, read protection: %-12s, write protection: %-12s\n",
+                           i, address,
+                           GET_FIELD(elements[i].data,
+                                     PROTECTION_OVERRIDE_ELEMENT_WINDOW_SIZE),
+                           GET_FIELD(elements[i].data,
+                                     PROTECTION_OVERRIDE_ELEMENT_READ),
+                           GET_FIELD(elements[i].data,
+                                     PROTECTION_OVERRIDE_ELEMENT_WRITE),
+                           s_protection_strs[GET_FIELD(elements[i].data,
+                               PROTECTION_OVERRIDE_ELEMENT_READ_PROTECTION)],
+                           s_protection_strs[GET_FIELD(elements[i].data,
+                               PROTECTION_OVERRIDE_ELEMENT_WRITE_PROTECTION)]);
+       }
+
+       results_offset += sprintf(qed_get_buf_ptr(results_buf,
+                                                 results_offset),
+                                 "protection override contained %d elements",
+                                 num_elements);
+
+       /* Add 1 for string NULL termination */
+       *parsed_results_bytes = results_offset + 1;
+       return DBG_STATUS_OK;
+}
+
+enum dbg_status
+qed_get_protection_override_results_buf_size(struct qed_hwfn *p_hwfn,
+                                            u32 *dump_buf,
+                                            u32 num_dumped_dwords,
+                                            u32 *results_buf_size)
+{
+       return qed_parse_protection_override_dump(p_hwfn,
+                                                 dump_buf,
+                                                 num_dumped_dwords,
+                                                 NULL, results_buf_size);
+}
+
+enum dbg_status qed_print_protection_override_results(struct qed_hwfn *p_hwfn,
+                                                     u32 *dump_buf,
+                                                     u32 num_dumped_dwords,
+                                                     char *results_buf)
+{
+       u32 parsed_buf_size;
+
+       return qed_parse_protection_override_dump(p_hwfn,
+                                                 dump_buf,
+                                                 num_dumped_dwords,
+                                                 results_buf,
+                                                 &parsed_buf_size);
+}
+
+/* Parses a FW Asserts dump buffer.
+ * If result_buf is not NULL, the FW Asserts results are printed to it.
+ * In any case, the required results buffer size is assigned to
+ * parsed_results_bytes.
+ * The parsing status is returned.
+ */
+static enum dbg_status qed_parse_fw_asserts_dump(struct qed_hwfn *p_hwfn,
+                                                u32 *dump_buf,
+                                                u32 num_dumped_dwords,
+                                                char *results_buf,
+                                                u32 *parsed_results_bytes)
+{
+       u32 results_offset = 0, num_section_params, param_num_val, i;
+       const char *param_name, *param_str_val, *section_name;
+       bool last_section_found = false;
+
+       *parsed_results_bytes = 0;
+
+       /* Read global_params section */
+       dump_buf += qed_read_section_hdr(dump_buf,
+                                        &section_name, &num_section_params);
+       if (strcmp(section_name, "global_params"))
+               return DBG_STATUS_FW_ASSERTS_PARSE_FAILED;
+
+       /* Print global params */
+       dump_buf += qed_print_section_params(dump_buf,
+                                            num_section_params,
+                                            results_buf, &results_offset);
+       while (!last_section_found) {
+               const char *storm_letter = NULL;
+               u32 storm_dump_size = 0;
+
+               dump_buf += qed_read_section_hdr(dump_buf,
+                                                &section_name,
+                                                &num_section_params);
+               if (!strcmp(section_name, "last")) {
+                       last_section_found = true;
+                       continue;
+               } else if (strcmp(section_name, "fw_asserts")) {
+                       return DBG_STATUS_FW_ASSERTS_PARSE_FAILED;
+               }
+
+               /* Extract params */
+               for (i = 0; i < num_section_params; i++) {
+                       dump_buf += qed_read_param(dump_buf,
+                                                  &param_name,
+                                                  &param_str_val,
+                                                  &param_num_val);
+                       if (!strcmp(param_name, "storm"))
+                               storm_letter = param_str_val;
+                       else if (!strcmp(param_name, "size"))
+                               storm_dump_size = param_num_val;
+                       else
+                               return DBG_STATUS_FW_ASSERTS_PARSE_FAILED;
+               }
+
+               if (!storm_letter || !storm_dump_size)
+                       return DBG_STATUS_FW_ASSERTS_PARSE_FAILED;
+
+               /* Print data */
+               results_offset += sprintf(qed_get_buf_ptr(results_buf,
+                                                         results_offset),
+                                         "\n%sSTORM_ASSERT: size=%d\n",
+                                         storm_letter, storm_dump_size);
+               for (i = 0; i < storm_dump_size; i++, dump_buf++)
+                       results_offset +=
+                           sprintf(qed_get_buf_ptr(results_buf,
+                                                   results_offset),
+                                   "%08x\n", *dump_buf);
+       }
+
+       /* Add 1 for string NULL termination */
+       *parsed_results_bytes = results_offset + 1;
+       return DBG_STATUS_OK;
+}
+
+enum dbg_status qed_get_fw_asserts_results_buf_size(struct qed_hwfn *p_hwfn,
+                                                   u32 *dump_buf,
+                                                   u32 num_dumped_dwords,
+                                                   u32 *results_buf_size)
+{
+       return qed_parse_fw_asserts_dump(p_hwfn,
+                                        dump_buf,
+                                        num_dumped_dwords,
+                                        NULL, results_buf_size);
+}
+
+enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn,
+                                            u32 *dump_buf,
+                                            u32 num_dumped_dwords,
+                                            char *results_buf)
+{
+       u32 parsed_buf_size;
+
+       return qed_parse_fw_asserts_dump(p_hwfn,
+                                        dump_buf,
+                                        num_dumped_dwords,
+                                        results_buf, &parsed_buf_size);
+}
+
+/* Wrapper for unifying the idle_chk and mcp_trace api */
+enum dbg_status qed_print_idle_chk_results_wrapper(struct qed_hwfn *p_hwfn,
+                                                  u32 *dump_buf,
+                                                  u32 num_dumped_dwords,
+                                                  char *results_buf)
+{
+       u32 num_errors, num_warnnings;
+
+       return qed_print_idle_chk_results(p_hwfn, dump_buf, num_dumped_dwords,
+                                         results_buf, &num_errors,
+                                         &num_warnnings);
+}
+
+/* Feature meta data lookup table */
+static struct {
+       char *name;
+       enum dbg_status (*get_size)(struct qed_hwfn *p_hwfn,
+                                   struct qed_ptt *p_ptt, u32 *size);
+       enum dbg_status (*perform_dump)(struct qed_hwfn *p_hwfn,
+                                       struct qed_ptt *p_ptt, u32 *dump_buf,
+                                       u32 buf_size, u32 *dumped_dwords);
+       enum dbg_status (*print_results)(struct qed_hwfn *p_hwfn,
+                                        u32 *dump_buf, u32 num_dumped_dwords,
+                                        char *results_buf);
+       enum dbg_status (*results_buf_size)(struct qed_hwfn *p_hwfn,
+                                           u32 *dump_buf,
+                                           u32 num_dumped_dwords,
+                                           u32 *results_buf_size);
+} qed_features_lookup[] = {
+       {
+       "grc", qed_dbg_grc_get_dump_buf_size,
+                   qed_dbg_grc_dump, NULL, NULL}, {
+       "idle_chk",
+                   qed_dbg_idle_chk_get_dump_buf_size,
+                   qed_dbg_idle_chk_dump,
+                   qed_print_idle_chk_results_wrapper,
+                   qed_get_idle_chk_results_buf_size}, {
+       "mcp_trace",
+                   qed_dbg_mcp_trace_get_dump_buf_size,
+                   qed_dbg_mcp_trace_dump, qed_print_mcp_trace_results,
+                   qed_get_mcp_trace_results_buf_size}, {
+       "reg_fifo",
+                   qed_dbg_reg_fifo_get_dump_buf_size,
+                   qed_dbg_reg_fifo_dump, qed_print_reg_fifo_results,
+                   qed_get_reg_fifo_results_buf_size}, {
+       "igu_fifo",
+                   qed_dbg_igu_fifo_get_dump_buf_size,
+                   qed_dbg_igu_fifo_dump, qed_print_igu_fifo_results,
+                   qed_get_igu_fifo_results_buf_size}, {
+       "protection_override",
+                   qed_dbg_protection_override_get_dump_buf_size,
+                   qed_dbg_protection_override_dump,
+                   qed_print_protection_override_results,
+                   qed_get_protection_override_results_buf_size}, {
+       "fw_asserts",
+                   qed_dbg_fw_asserts_get_dump_buf_size,
+                   qed_dbg_fw_asserts_dump,
+                   qed_print_fw_asserts_results,
+                   qed_get_fw_asserts_results_buf_size},};
+
+static void qed_dbg_print_feature(u8 *p_text_buf, u32 text_size)
+{
+       u32 i, precision = 80;
+
+       if (!p_text_buf)
+               return;
+
+       pr_notice("\n%.*s", precision, p_text_buf);
+       for (i = precision; i < text_size; i += precision)
+               pr_cont("%.*s", precision, p_text_buf + i);
+       pr_cont("\n");
+}
+
+#define QED_RESULTS_BUF_MIN_SIZE 16
+/* Generic function for decoding debug feature info */
+enum dbg_status format_feature(struct qed_hwfn *p_hwfn,
+                              enum qed_dbg_features feature_idx)
+{
+       struct qed_dbg_feature *feature =
+           &p_hwfn->cdev->dbg_params.features[feature_idx];
+       u32 text_size_bytes, null_char_pos, i;
+       enum dbg_status rc;
+       char *text_buf;
+
+       /* Check if feature supports formatting capability */
+       if (!qed_features_lookup[feature_idx].results_buf_size)
+               return DBG_STATUS_OK;
+
+       /* Obtain size of formatted output */
+       rc = qed_features_lookup[feature_idx].
+               results_buf_size(p_hwfn, (u32 *)feature->dump_buf,
+                                feature->dumped_dwords, &text_size_bytes);
+       if (rc != DBG_STATUS_OK)
+               return rc;
+
+       /* Make sure that the allocated size is a multiple of dword (4 bytes) */
+       null_char_pos = text_size_bytes - 1;
+       text_size_bytes = (text_size_bytes + 3) & ~0x3;
+
+       if (text_size_bytes < QED_RESULTS_BUF_MIN_SIZE) {
+               DP_NOTICE(p_hwfn->cdev,
+                         "formatted size of feature was too small %d. Aborting\n",
+                         text_size_bytes);
+               return DBG_STATUS_INVALID_ARGS;
+       }
+
+       /* Allocate temp text buf */
+       text_buf = vzalloc(text_size_bytes);
+       if (!text_buf)
+               return DBG_STATUS_VIRT_MEM_ALLOC_FAILED;
+
+       /* Decode feature opcodes to string on temp buf */
+       rc = qed_features_lookup[feature_idx].
+               print_results(p_hwfn, (u32 *)feature->dump_buf,
+                             feature->dumped_dwords, text_buf);
+       if (rc != DBG_STATUS_OK) {
+               vfree(text_buf);
+               return rc;
+       }
+
+       /* Replace the original null character with a '\n' character.
+        * The bytes that were added as a result of the dword alignment are also
+        * padded with '\n' characters.
+        */
+       for (i = null_char_pos; i < text_size_bytes; i++)
+               text_buf[i] = '\n';
+
+       /* Dump printable feature to log */
+       if (p_hwfn->cdev->dbg_params.print_data)
+               qed_dbg_print_feature(text_buf, text_size_bytes);
+
+       /* Free the old dump_buf and point the dump_buf to the newly allocagted
+        * and formatted text buffer.
+        */
+       vfree(feature->dump_buf);
+       feature->dump_buf = text_buf;
+       feature->buf_size = text_size_bytes;
+       feature->dumped_dwords = text_size_bytes / 4;
+       return rc;
+}
+
+/* Generic function for performing the dump of a debug feature. */
+enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+                            enum qed_dbg_features feature_idx)
+{
+       struct qed_dbg_feature *feature =
+           &p_hwfn->cdev->dbg_params.features[feature_idx];
+       u32 buf_size_dwords;
+       enum dbg_status rc;
+
+       DP_NOTICE(p_hwfn->cdev, "Collecting a debug feature [\"%s\"]\n",
+                 qed_features_lookup[feature_idx].name);
+
+       /* Dump_buf was already allocated need to free (this can happen if dump
+        * was called but file was never read).
+        * We can't use the buffer as is since size may have changed.
+        */
+       if (feature->dump_buf) {
+               vfree(feature->dump_buf);
+               feature->dump_buf = NULL;
+       }
+
+       /* Get buffer size from hsi, allocate accordingly, and perform the
+        * dump.
+        */
+       rc = qed_features_lookup[feature_idx].get_size(p_hwfn, p_ptt,
+                                                      &buf_size_dwords);
+       if (rc != DBG_STATUS_OK)
+               return rc;
+       feature->buf_size = buf_size_dwords * sizeof(u32);
+       feature->dump_buf = vmalloc(feature->buf_size);
+       if (!feature->dump_buf)
+               return DBG_STATUS_VIRT_MEM_ALLOC_FAILED;
+
+       rc = qed_features_lookup[feature_idx].
+               perform_dump(p_hwfn, p_ptt, (u32 *)feature->dump_buf,
+                            feature->buf_size / sizeof(u32),
+                            &feature->dumped_dwords);
+
+       /* If mcp is stuck we get DBG_STATUS_NVRAM_GET_IMAGE_FAILED error.
+        * In this case the buffer holds valid binary data, but we wont able
+        * to parse it (since parsing relies on data in NVRAM which is only
+        * accessible when MFW is responsive). skip the formatting but return
+        * success so that binary data is provided.
+        */
+       if (rc == DBG_STATUS_NVRAM_GET_IMAGE_FAILED)
+               return DBG_STATUS_OK;
+
+       if (rc != DBG_STATUS_OK)
+               return rc;
+
+       /* Format output */
+       rc = format_feature(p_hwfn, feature_idx);
+       return rc;
+}
+
+int qed_dbg_grc(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes)
+{
+       return qed_dbg_feature(cdev, buffer, DBG_FEATURE_GRC, num_dumped_bytes);
+}
+
+int qed_dbg_grc_size(struct qed_dev *cdev)
+{
+       return qed_dbg_feature_size(cdev, DBG_FEATURE_GRC);
+}
+
+int qed_dbg_idle_chk(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes)
+{
+       return qed_dbg_feature(cdev, buffer, DBG_FEATURE_IDLE_CHK,
+                              num_dumped_bytes);
+}
+
+int qed_dbg_idle_chk_size(struct qed_dev *cdev)
+{
+       return qed_dbg_feature_size(cdev, DBG_FEATURE_IDLE_CHK);
+}
+
+int qed_dbg_reg_fifo(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes)
+{
+       return qed_dbg_feature(cdev, buffer, DBG_FEATURE_REG_FIFO,
+                              num_dumped_bytes);
+}
+
+int qed_dbg_reg_fifo_size(struct qed_dev *cdev)
+{
+       return qed_dbg_feature_size(cdev, DBG_FEATURE_REG_FIFO);
+}
+
+int qed_dbg_igu_fifo(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes)
+{
+       return qed_dbg_feature(cdev, buffer, DBG_FEATURE_IGU_FIFO,
+                              num_dumped_bytes);
+}
+
+int qed_dbg_igu_fifo_size(struct qed_dev *cdev)
+{
+       return qed_dbg_feature_size(cdev, DBG_FEATURE_IGU_FIFO);
+}
+
+int qed_dbg_protection_override(struct qed_dev *cdev, void *buffer,
+                               u32 *num_dumped_bytes)
+{
+       return qed_dbg_feature(cdev, buffer, DBG_FEATURE_PROTECTION_OVERRIDE,
+                              num_dumped_bytes);
+}
+
+int qed_dbg_protection_override_size(struct qed_dev *cdev)
+{
+       return qed_dbg_feature_size(cdev, DBG_FEATURE_PROTECTION_OVERRIDE);
+}
+
+int qed_dbg_fw_asserts(struct qed_dev *cdev, void *buffer,
+                      u32 *num_dumped_bytes)
+{
+       return qed_dbg_feature(cdev, buffer, DBG_FEATURE_FW_ASSERTS,
+                              num_dumped_bytes);
+}
+
+int qed_dbg_fw_asserts_size(struct qed_dev *cdev)
+{
+       return qed_dbg_feature_size(cdev, DBG_FEATURE_FW_ASSERTS);
+}
+
+int qed_dbg_mcp_trace(struct qed_dev *cdev, void *buffer,
+                     u32 *num_dumped_bytes)
+{
+       return qed_dbg_feature(cdev, buffer, DBG_FEATURE_MCP_TRACE,
+                              num_dumped_bytes);
+}
+
+int qed_dbg_mcp_trace_size(struct qed_dev *cdev)
+{
+       return qed_dbg_feature_size(cdev, DBG_FEATURE_MCP_TRACE);
+}
+
+/* Defines the amount of bytes allocated for recording the length of debugfs
+ * feature buffer.
+ */
+#define REGDUMP_HEADER_SIZE                    sizeof(u32)
+#define REGDUMP_HEADER_FEATURE_SHIFT           24
+#define REGDUMP_HEADER_ENGINE_SHIFT            31
+#define REGDUMP_HEADER_OMIT_ENGINE_SHIFT       30
+enum debug_print_features {
+       OLD_MODE = 0,
+       IDLE_CHK = 1,
+       GRC_DUMP = 2,
+       MCP_TRACE = 3,
+       REG_FIFO = 4,
+       PROTECTION_OVERRIDE = 5,
+       IGU_FIFO = 6,
+       PHY = 7,
+       FW_ASSERTS = 8,
+};
+
+static u32 qed_calc_regdump_header(enum debug_print_features feature,
+                                  int engine, u32 feature_size, u8 omit_engine)
+{
+       /* Insert the engine, feature and mode inside the header and combine it
+        * with feature size.
+        */
+       return feature_size | (feature << REGDUMP_HEADER_FEATURE_SHIFT) |
+              (omit_engine << REGDUMP_HEADER_OMIT_ENGINE_SHIFT) |
+              (engine << REGDUMP_HEADER_ENGINE_SHIFT);
+}
+
+int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
+{
+       u8 cur_engine, omit_engine = 0, org_engine;
+       u32 offset = 0, feature_size;
+       int rc;
+
+       if (cdev->num_hwfns == 1)
+               omit_engine = 1;
+
+       org_engine = qed_get_debug_engine(cdev);
+       for (cur_engine = 0; cur_engine < cdev->num_hwfns; cur_engine++) {
+               /* Collect idle_chks and grcDump for each hw function */
+               DP_VERBOSE(cdev, QED_MSG_DEBUG,
+                          "obtaining idle_chk and grcdump for current engine\n");
+               qed_set_debug_engine(cdev, cur_engine);
+
+               /* First idle_chk */
+               rc = qed_dbg_idle_chk(cdev, (u8 *)buffer + offset +
+                                     REGDUMP_HEADER_SIZE, &feature_size);
+               if (!rc) {
+                       *(u32 *)((u8 *)buffer + offset) =
+                           qed_calc_regdump_header(IDLE_CHK, cur_engine,
+                                                   feature_size, omit_engine);
+                       offset += (feature_size + REGDUMP_HEADER_SIZE);
+               } else {
+                       DP_ERR(cdev, "qed_dbg_idle_chk failed. rc = %d\n", rc);
+               }
+
+               /* Second idle_chk */
+               rc = qed_dbg_idle_chk(cdev, (u8 *)buffer + offset +
+                                     REGDUMP_HEADER_SIZE, &feature_size);
+               if (!rc) {
+                       *(u32 *)((u8 *)buffer + offset) =
+                           qed_calc_regdump_header(IDLE_CHK, cur_engine,
+                                                   feature_size, omit_engine);
+                       offset += (feature_size + REGDUMP_HEADER_SIZE);
+               } else {
+                       DP_ERR(cdev, "qed_dbg_idle_chk failed. rc = %d\n", rc);
+               }
+
+               /* reg_fifo dump */
+               rc = qed_dbg_reg_fifo(cdev, (u8 *)buffer + offset +
+                                     REGDUMP_HEADER_SIZE, &feature_size);
+               if (!rc) {
+                       *(u32 *)((u8 *)buffer + offset) =
+                           qed_calc_regdump_header(REG_FIFO, cur_engine,
+                                                   feature_size, omit_engine);
+                       offset += (feature_size + REGDUMP_HEADER_SIZE);
+               } else {
+                       DP_ERR(cdev, "qed_dbg_reg_fifo failed. rc = %d\n", rc);
+               }
+
+               /* igu_fifo dump */
+               rc = qed_dbg_igu_fifo(cdev, (u8 *)buffer + offset +
+                                     REGDUMP_HEADER_SIZE, &feature_size);
+               if (!rc) {
+                       *(u32 *)((u8 *)buffer + offset) =
+                           qed_calc_regdump_header(IGU_FIFO, cur_engine,
+                                                   feature_size, omit_engine);
+                       offset += (feature_size + REGDUMP_HEADER_SIZE);
+               } else {
+                       DP_ERR(cdev, "qed_dbg_igu_fifo failed. rc = %d", rc);
+               }
+
+               /* protection_override dump */
+               rc = qed_dbg_protection_override(cdev, (u8 *)buffer + offset +
+                                                REGDUMP_HEADER_SIZE,
+                                                &feature_size);
+               if (!rc) {
+                       *(u32 *)((u8 *)buffer + offset) =
+                           qed_calc_regdump_header(PROTECTION_OVERRIDE,
+                                                   cur_engine,
+                                                   feature_size, omit_engine);
+                       offset += (feature_size + REGDUMP_HEADER_SIZE);
+               } else {
+                       DP_ERR(cdev,
+                              "qed_dbg_protection_override failed. rc = %d\n",
+                              rc);
+               }
+
+               /* fw_asserts dump */
+               rc = qed_dbg_fw_asserts(cdev, (u8 *)buffer + offset +
+                                       REGDUMP_HEADER_SIZE, &feature_size);
+               if (!rc) {
+                       *(u32 *)((u8 *)buffer + offset) =
+                           qed_calc_regdump_header(FW_ASSERTS, cur_engine,
+                                                   feature_size, omit_engine);
+                       offset += (feature_size + REGDUMP_HEADER_SIZE);
+               } else {
+                       DP_ERR(cdev, "qed_dbg_fw_asserts failed. rc = %d\n",
+                              rc);
+               }
+
+               /* GRC dump - must be last because when mcp stuck it will
+                * clutter idle_chk, reg_fifo, ...
+                */
+               rc = qed_dbg_grc(cdev, (u8 *)buffer + offset +
+                                REGDUMP_HEADER_SIZE, &feature_size);
+               if (!rc) {
+                       *(u32 *)((u8 *)buffer + offset) =
+                           qed_calc_regdump_header(GRC_DUMP, cur_engine,
+                                                   feature_size, omit_engine);
+                       offset += (feature_size + REGDUMP_HEADER_SIZE);
+               } else {
+                       DP_ERR(cdev, "qed_dbg_grc failed. rc = %d", rc);
+               }
+       }
+
+       /* mcp_trace */
+       rc = qed_dbg_mcp_trace(cdev, (u8 *)buffer + offset +
+                              REGDUMP_HEADER_SIZE, &feature_size);
+       if (!rc) {
+               *(u32 *)((u8 *)buffer + offset) =
+                   qed_calc_regdump_header(MCP_TRACE, cur_engine,
+                                           feature_size, omit_engine);
+               offset += (feature_size + REGDUMP_HEADER_SIZE);
+       } else {
+               DP_ERR(cdev, "qed_dbg_mcp_trace failed. rc = %d\n", rc);
+       }
+
+       qed_set_debug_engine(cdev, org_engine);
+
+       return 0;
+}
+
+int qed_dbg_all_data_size(struct qed_dev *cdev)
+{
+       u8 cur_engine, org_engine;
+       u32 regs_len = 0;
+
+       org_engine = qed_get_debug_engine(cdev);
+       for (cur_engine = 0; cur_engine < cdev->num_hwfns; cur_engine++) {
+               /* Engine specific */
+               DP_VERBOSE(cdev, QED_MSG_DEBUG,
+                          "calculating idle_chk and grcdump register length for current engine\n");
+               qed_set_debug_engine(cdev, cur_engine);
+               regs_len += REGDUMP_HEADER_SIZE + qed_dbg_idle_chk_size(cdev) +
+                           REGDUMP_HEADER_SIZE + qed_dbg_idle_chk_size(cdev) +
+                           REGDUMP_HEADER_SIZE + qed_dbg_grc_size(cdev) +
+                           REGDUMP_HEADER_SIZE + qed_dbg_reg_fifo_size(cdev) +
+                           REGDUMP_HEADER_SIZE + qed_dbg_igu_fifo_size(cdev) +
+                           REGDUMP_HEADER_SIZE +
+                           qed_dbg_protection_override_size(cdev) +
+                           REGDUMP_HEADER_SIZE + qed_dbg_fw_asserts_size(cdev);
+       }
+
+       /* Engine common */
+       regs_len += REGDUMP_HEADER_SIZE + qed_dbg_mcp_trace_size(cdev);
+       qed_set_debug_engine(cdev, org_engine);
+
+       return regs_len;
+}
+
+int qed_dbg_feature(struct qed_dev *cdev, void *buffer,
+                   enum qed_dbg_features feature, u32 *num_dumped_bytes)
+{
+       struct qed_hwfn *p_hwfn =
+               &cdev->hwfns[cdev->dbg_params.engine_for_debug];
+       struct qed_dbg_feature *qed_feature =
+               &cdev->dbg_params.features[feature];
+       enum dbg_status dbg_rc;
+       struct qed_ptt *p_ptt;
+       int rc = 0;
+
+       /* Acquire ptt */
+       p_ptt = qed_ptt_acquire(p_hwfn);
+       if (!p_ptt)
+               return -EINVAL;
+
+       /* Get dump */
+       dbg_rc = qed_dbg_dump(p_hwfn, p_ptt, feature);
+       if (dbg_rc != DBG_STATUS_OK) {
+               DP_VERBOSE(cdev, QED_MSG_DEBUG, "%s\n",
+                          qed_dbg_get_status_str(dbg_rc));
+               *num_dumped_bytes = 0;
+               rc = -EINVAL;
+               goto out;
+       }
+
+       DP_VERBOSE(cdev, QED_MSG_DEBUG,
+                  "copying debugfs feature to external buffer\n");
+       memcpy(buffer, qed_feature->dump_buf, qed_feature->buf_size);
+       *num_dumped_bytes = cdev->dbg_params.features[feature].dumped_dwords *
+                           4;
+
+out:
+       qed_ptt_release(p_hwfn, p_ptt);
+       return rc;
+}
+
+int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature)
+{
+       struct qed_hwfn *p_hwfn =
+               &cdev->hwfns[cdev->dbg_params.engine_for_debug];
+       struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+       struct qed_dbg_feature *qed_feature =
+               &cdev->dbg_params.features[feature];
+       u32 buf_size_dwords;
+       enum dbg_status rc;
+
+       if (!p_ptt)
+               return -EINVAL;
+
+       rc = qed_features_lookup[feature].get_size(p_hwfn, p_ptt,
+                                                  &buf_size_dwords);
+       if (rc != DBG_STATUS_OK)
+               buf_size_dwords = 0;
+
+       qed_ptt_release(p_hwfn, p_ptt);
+       qed_feature->buf_size = buf_size_dwords * sizeof(u32);
+       return qed_feature->buf_size;
+}
+
+u8 qed_get_debug_engine(struct qed_dev *cdev)
+{
+       return cdev->dbg_params.engine_for_debug;
+}
+
+void qed_set_debug_engine(struct qed_dev *cdev, int engine_number)
+{
+       DP_VERBOSE(cdev, QED_MSG_DEBUG, "set debug engine to %d\n",
+                  engine_number);
+       cdev->dbg_params.engine_for_debug = engine_number;
+}
+
+void qed_dbg_pf_init(struct qed_dev *cdev)
+{
+       const u8 *dbg_values;
+
+       /* Debug values are after init values.
+        * The offset is the first dword of the file.
+        */
+       dbg_values = cdev->firmware->data + *(u32 *)cdev->firmware->data;
+       qed_dbg_set_bin_ptr((u8 *)dbg_values);
+       qed_dbg_user_set_bin_ptr((u8 *)dbg_values);
+}
+
+void qed_dbg_pf_exit(struct qed_dev *cdev)
+{
+       struct qed_dbg_feature *feature = NULL;
+       enum qed_dbg_features feature_idx;
+
+       /* Debug features' buffers may be allocated if debug feature was used
+        * but dump wasn't called.
+        */
+       for (feature_idx = 0; feature_idx < DBG_FEATURE_NUM; feature_idx++) {
+               feature = &cdev->dbg_params.features[feature_idx];
+               if (feature->dump_buf) {
+                       vfree(feature->dump_buf);
+                       feature->dump_buf = NULL;
+               }
+       }
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.h b/drivers/net/ethernet/qlogic/qed/qed_debug.h
new file mode 100644 (file)
index 0000000..f872d73
--- /dev/null
@@ -0,0 +1,54 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_DEBUGFS_H
+#define _QED_DEBUGFS_H
+
+enum qed_dbg_features {
+       DBG_FEATURE_GRC,
+       DBG_FEATURE_IDLE_CHK,
+       DBG_FEATURE_MCP_TRACE,
+       DBG_FEATURE_REG_FIFO,
+       DBG_FEATURE_IGU_FIFO,
+       DBG_FEATURE_PROTECTION_OVERRIDE,
+       DBG_FEATURE_FW_ASSERTS,
+       DBG_FEATURE_NUM
+};
+
+int qed_dbg_grc(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes);
+int qed_dbg_grc_size(struct qed_dev *cdev);
+int qed_dbg_idle_chk(struct qed_dev *cdev, void *buffer,
+                    u32 *num_dumped_bytes);
+int qed_dbg_idle_chk_size(struct qed_dev *cdev);
+int qed_dbg_reg_fifo(struct qed_dev *cdev, void *buffer,
+                    u32 *num_dumped_bytes);
+int qed_dbg_reg_fifo_size(struct qed_dev *cdev);
+int qed_dbg_igu_fifo(struct qed_dev *cdev, void *buffer,
+                    u32 *num_dumped_bytes);
+int qed_dbg_igu_fifo_size(struct qed_dev *cdev);
+int qed_dbg_protection_override(struct qed_dev *cdev, void *buffer,
+                               u32 *num_dumped_bytes);
+int qed_dbg_protection_override_size(struct qed_dev *cdev);
+int qed_dbg_fw_asserts(struct qed_dev *cdev, void *buffer,
+                      u32 *num_dumped_bytes);
+int qed_dbg_fw_asserts_size(struct qed_dev *cdev);
+int qed_dbg_mcp_trace(struct qed_dev *cdev, void *buffer,
+                     u32 *num_dumped_bytes);
+int qed_dbg_mcp_trace_size(struct qed_dev *cdev);
+int qed_dbg_all_data(struct qed_dev *cdev, void *buffer);
+int qed_dbg_all_data_size(struct qed_dev *cdev);
+u8 qed_get_debug_engine(struct qed_dev *cdev);
+void qed_set_debug_engine(struct qed_dev *cdev, int engine_number);
+int qed_dbg_feature(struct qed_dev *cdev, void *buffer,
+                   enum qed_dbg_features feature, u32 *num_dumped_bytes);
+int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature);
+
+void qed_dbg_pf_init(struct qed_dev *cdev);
+void qed_dbg_pf_exit(struct qed_dev *cdev);
+
+#endif
index 8117ddf..13d8b40 100644 (file)
@@ -340,7 +340,6 @@ static int qed_init_qm_info(struct qed_hwfn *p_hwfn, bool b_sleepable)
        return 0;
 
 alloc_err:
-       DP_NOTICE(p_hwfn, "Failed to allocate memory for QM params\n");
        qed_qm_info_free(p_hwfn);
        return -ENOMEM;
 }
@@ -424,18 +423,12 @@ int qed_resc_alloc(struct qed_dev *cdev)
                                     RESC_NUM(p_hwfn, QED_L2_QUEUE);
 
                p_hwfn->p_tx_cids = kzalloc(tx_size, GFP_KERNEL);
-               if (!p_hwfn->p_tx_cids) {
-                       DP_NOTICE(p_hwfn,
-                                 "Failed to allocate memory for Tx Cids\n");
+               if (!p_hwfn->p_tx_cids)
                        goto alloc_no_mem;
-               }
 
                p_hwfn->p_rx_cids = kzalloc(rx_size, GFP_KERNEL);
-               if (!p_hwfn->p_rx_cids) {
-                       DP_NOTICE(p_hwfn,
-                                 "Failed to allocate memory for Rx Cids\n");
+               if (!p_hwfn->p_rx_cids)
                        goto alloc_no_mem;
-               }
        }
 
        for_each_hwfn(cdev, i) {
@@ -522,26 +515,18 @@ int qed_resc_alloc(struct qed_dev *cdev)
 
                /* DMA info initialization */
                rc = qed_dmae_info_alloc(p_hwfn);
-               if (rc) {
-                       DP_NOTICE(p_hwfn,
-                                 "Failed to allocate memory for dmae_info structure\n");
+               if (rc)
                        goto alloc_err;
-               }
 
                /* DCBX initialization */
                rc = qed_dcbx_info_alloc(p_hwfn);
-               if (rc) {
-                       DP_NOTICE(p_hwfn,
-                                 "Failed to allocate memory for dcbx structure\n");
+               if (rc)
                        goto alloc_err;
-               }
        }
 
        cdev->reset_stats = kzalloc(sizeof(*cdev->reset_stats), GFP_KERNEL);
-       if (!cdev->reset_stats) {
-               DP_NOTICE(cdev, "Failed to allocate reset statistics\n");
+       if (!cdev->reset_stats)
                goto alloc_no_mem;
-       }
 
        return 0;
 
@@ -772,6 +757,9 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
                concrete_fid = qed_vfid_to_concrete(p_hwfn, vf_id);
                qed_fid_pretend(p_hwfn, p_ptt, (u16) concrete_fid);
                qed_wr(p_hwfn, p_ptt, CCFC_REG_STRONG_ENABLE_VF, 0x1);
+               qed_wr(p_hwfn, p_ptt, CCFC_REG_WEAK_ENABLE_VF, 0x0);
+               qed_wr(p_hwfn, p_ptt, TCFC_REG_STRONG_ENABLE_VF, 0x1);
+               qed_wr(p_hwfn, p_ptt, TCFC_REG_WEAK_ENABLE_VF, 0x0);
        }
        /* pretend to original PF */
        qed_fid_pretend(p_hwfn, p_ptt, p_hwfn->rel_pf_id);
@@ -782,34 +770,8 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
 static int qed_hw_init_port(struct qed_hwfn *p_hwfn,
                            struct qed_ptt *p_ptt, int hw_mode)
 {
-       int rc = 0;
-
-       rc = qed_init_run(p_hwfn, p_ptt, PHASE_PORT, p_hwfn->port_id, hw_mode);
-       if (rc)
-               return rc;
-
-       if (hw_mode & (1 << MODE_MF_SI)) {
-               u8 pf_id = 0;
-
-               if (!qed_hw_init_first_eth(p_hwfn, p_ptt, &pf_id)) {
-                       DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP,
-                                  "PF[%08x] is first eth on engine\n", pf_id);
-
-                       /* We should have configured BIT for ppfid, i.e., the
-                        * relative function number in the port. But there's a
-                        * bug in LLH in BB where the ppfid is actually engine
-                        * based, so we need to take this into account.
-                        */
-                       qed_wr(p_hwfn, p_ptt,
-                              NIG_REG_LLH_TAGMAC_DEF_PF_VECTOR, 1 << pf_id);
-               }
-
-               /* Take the protocol-based hit vector if there is a hit,
-                * otherwise take the other vector.
-                */
-               qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_CLS_TYPE_DUALMODE, 0x2);
-       }
-       return rc;
+       return qed_init_run(p_hwfn, p_ptt, PHASE_PORT,
+                           p_hwfn->port_id, hw_mode);
 }
 
 static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
@@ -878,21 +840,6 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
        /* Pure runtime initializations - directly to the HW  */
        qed_int_igu_init_pure_rt(p_hwfn, p_ptt, true, true);
 
-       if (hw_mode & (1 << MODE_MF_SI)) {
-               u8 pf_id = 0;
-               u32 val = 0;
-
-               if (!qed_hw_init_first_eth(p_hwfn, p_ptt, &pf_id)) {
-                       if (p_hwfn->rel_pf_id == pf_id) {
-                               DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP,
-                                          "PF[%d] is first ETH on engine\n",
-                                          pf_id);
-                               val = 1;
-                       }
-                       qed_wr(p_hwfn, p_ptt, PRS_REG_MSG_INFO, val);
-               }
-       }
-
        if (b_hw_start) {
                /* enable interrupts */
                qed_int_igu_enable(p_hwfn, p_ptt, int_mode);
@@ -1751,10 +1698,8 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
 
        /* Allocate PTT pool */
        rc = qed_ptt_pool_alloc(p_hwfn);
-       if (rc) {
-               DP_NOTICE(p_hwfn, "Failed to prepare hwfn's hw\n");
+       if (rc)
                goto err0;
-       }
 
        /* Allocate the main PTT */
        p_hwfn->p_main_ptt = qed_get_reserved_ptt(p_hwfn, RESERVED_PTT_MAIN);
@@ -1784,10 +1729,8 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
 
        /* Allocate the init RT array and initialize the init-ops engine */
        rc = qed_init_alloc(p_hwfn);
-       if (rc) {
-               DP_NOTICE(p_hwfn, "Failed to allocate the init array\n");
+       if (rc)
                goto err2;
-       }
 
        return rc;
 err2:
@@ -1995,10 +1938,8 @@ qed_chain_alloc_next_ptr(struct qed_dev *cdev, struct qed_chain *p_chain)
                p_virt = dma_alloc_coherent(&cdev->pdev->dev,
                                            QED_CHAIN_PAGE_SIZE,
                                            &p_phys, GFP_KERNEL);
-               if (!p_virt) {
-                       DP_NOTICE(cdev, "Failed to allocate chain memory\n");
+               if (!p_virt)
                        return -ENOMEM;
-               }
 
                if (i == 0) {
                        qed_chain_init_mem(p_chain, p_virt, p_phys);
@@ -2028,10 +1969,8 @@ qed_chain_alloc_single(struct qed_dev *cdev, struct qed_chain *p_chain)
 
        p_virt = dma_alloc_coherent(&cdev->pdev->dev,
                                    QED_CHAIN_PAGE_SIZE, &p_phys, GFP_KERNEL);
-       if (!p_virt) {
-               DP_NOTICE(cdev, "Failed to allocate chain memory\n");
+       if (!p_virt)
                return -ENOMEM;
-       }
 
        qed_chain_init_mem(p_chain, p_virt, p_phys);
        qed_chain_reset(p_chain);
@@ -2048,13 +1987,9 @@ static int qed_chain_alloc_pbl(struct qed_dev *cdev, struct qed_chain *p_chain)
        void *p_virt = NULL;
 
        size = page_cnt * sizeof(*pp_virt_addr_tbl);
-       pp_virt_addr_tbl = vmalloc(size);
-       if (!pp_virt_addr_tbl) {
-               DP_NOTICE(cdev,
-                         "Failed to allocate memory for the chain virtual addresses table\n");
+       pp_virt_addr_tbl = vzalloc(size);
+       if (!pp_virt_addr_tbl)
                return -ENOMEM;
-       }
-       memset(pp_virt_addr_tbl, 0, size);
 
        /* The allocation of the PBL table is done with its full size, since it
         * is expected to be successive.
@@ -2067,19 +2002,15 @@ static int qed_chain_alloc_pbl(struct qed_dev *cdev, struct qed_chain *p_chain)
                                        size, &p_pbl_phys, GFP_KERNEL);
        qed_chain_init_pbl_mem(p_chain, p_pbl_virt, p_pbl_phys,
                               pp_virt_addr_tbl);
-       if (!p_pbl_virt) {
-               DP_NOTICE(cdev, "Failed to allocate chain pbl memory\n");
+       if (!p_pbl_virt)
                return -ENOMEM;
-       }
 
        for (i = 0; i < page_cnt; i++) {
                p_virt = dma_alloc_coherent(&cdev->pdev->dev,
                                            QED_CHAIN_PAGE_SIZE,
                                            &p_phys, GFP_KERNEL);
-               if (!p_virt) {
-                       DP_NOTICE(cdev, "Failed to allocate chain memory\n");
+               if (!p_virt)
                        return -ENOMEM;
-               }
 
                if (i == 0) {
                        qed_chain_init_mem(p_chain, p_virt, p_phys);
@@ -2114,7 +2045,8 @@ int qed_chain_alloc(struct qed_dev *cdev,
        rc = qed_chain_alloc_sanity_check(cdev, cnt_type, elem_size, page_cnt);
        if (rc) {
                DP_NOTICE(cdev,
-                         "Cannot allocate a chain with the given arguments:\n"
+                         "Cannot allocate a chain with the given arguments:\n");
+               DP_NOTICE(cdev,
                          "[use_mode %d, mode %d, cnt_type %d, num_elems %d, elem_size %zu]\n",
                          intended_use, mode, cnt_type, num_elems, elem_size);
                return rc;
index 6f9d3b8..2777d5b 100644 (file)
@@ -536,6 +536,244 @@ struct core_conn_context {
        struct regpair ustorm_st_padding[2];
 };
 
+enum core_error_handle {
+       LL2_DROP_PACKET,
+       LL2_DO_NOTHING,
+       LL2_ASSERT,
+       MAX_CORE_ERROR_HANDLE
+};
+
+enum core_event_opcode {
+       CORE_EVENT_TX_QUEUE_START,
+       CORE_EVENT_TX_QUEUE_STOP,
+       CORE_EVENT_RX_QUEUE_START,
+       CORE_EVENT_RX_QUEUE_STOP,
+       MAX_CORE_EVENT_OPCODE
+};
+
+enum core_l4_pseudo_checksum_mode {
+       CORE_L4_PSEUDO_CSUM_CORRECT_LENGTH,
+       CORE_L4_PSEUDO_CSUM_ZERO_LENGTH,
+       MAX_CORE_L4_PSEUDO_CHECKSUM_MODE
+};
+
+struct core_ll2_port_stats {
+       struct regpair gsi_invalid_hdr;
+       struct regpair gsi_invalid_pkt_length;
+       struct regpair gsi_unsupported_pkt_typ;
+       struct regpair gsi_crcchksm_error;
+};
+
+struct core_ll2_pstorm_per_queue_stat {
+       struct regpair sent_ucast_bytes;
+       struct regpair sent_mcast_bytes;
+       struct regpair sent_bcast_bytes;
+       struct regpair sent_ucast_pkts;
+       struct regpair sent_mcast_pkts;
+       struct regpair sent_bcast_pkts;
+};
+
+struct core_ll2_rx_prod {
+       __le16 bd_prod;
+       __le16 cqe_prod;
+       __le32 reserved;
+};
+
+struct core_ll2_tstorm_per_queue_stat {
+       struct regpair packet_too_big_discard;
+       struct regpair no_buff_discard;
+};
+
+struct core_ll2_ustorm_per_queue_stat {
+       struct regpair rcv_ucast_bytes;
+       struct regpair rcv_mcast_bytes;
+       struct regpair rcv_bcast_bytes;
+       struct regpair rcv_ucast_pkts;
+       struct regpair rcv_mcast_pkts;
+       struct regpair rcv_bcast_pkts;
+};
+
+enum core_ramrod_cmd_id {
+       CORE_RAMROD_UNUSED,
+       CORE_RAMROD_RX_QUEUE_START,
+       CORE_RAMROD_TX_QUEUE_START,
+       CORE_RAMROD_RX_QUEUE_STOP,
+       CORE_RAMROD_TX_QUEUE_STOP,
+       MAX_CORE_RAMROD_CMD_ID
+};
+
+enum core_roce_flavor_type {
+       CORE_ROCE,
+       CORE_RROCE,
+       MAX_CORE_ROCE_FLAVOR_TYPE
+};
+
+struct core_rx_action_on_error {
+       u8 error_type;
+#define CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG_MASK    0x3
+#define CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG_SHIFT 0
+#define CORE_RX_ACTION_ON_ERROR_NO_BUFF_MASK   0x3
+#define CORE_RX_ACTION_ON_ERROR_NO_BUFF_SHIFT  2
+#define CORE_RX_ACTION_ON_ERROR_RESERVED_MASK  0xF
+#define CORE_RX_ACTION_ON_ERROR_RESERVED_SHIFT 4
+};
+
+struct core_rx_bd {
+       struct regpair addr;
+       __le16 reserved[4];
+};
+
+struct core_rx_bd_with_buff_len {
+       struct regpair addr;
+       __le16 buff_length;
+       __le16 reserved[3];
+};
+
+union core_rx_bd_union {
+       struct core_rx_bd rx_bd;
+       struct core_rx_bd_with_buff_len rx_bd_with_len;
+};
+
+struct core_rx_cqe_opaque_data {
+       __le32 data[2];
+};
+
+enum core_rx_cqe_type {
+       CORE_RX_CQE_ILLIGAL_TYPE,
+       CORE_RX_CQE_TYPE_REGULAR,
+       CORE_RX_CQE_TYPE_GSI_OFFLOAD,
+       CORE_RX_CQE_TYPE_SLOW_PATH,
+       MAX_CORE_RX_CQE_TYPE
+};
+
+struct core_rx_fast_path_cqe {
+       u8 type;
+       u8 placement_offset;
+       struct parsing_and_err_flags parse_flags;
+       __le16 packet_length;
+       __le16 vlan;
+       struct core_rx_cqe_opaque_data opaque_data;
+       __le32 reserved[4];
+};
+
+struct core_rx_gsi_offload_cqe {
+       u8 type;
+       u8 data_length_error;
+       struct parsing_and_err_flags parse_flags;
+       __le16 data_length;
+       __le16 vlan;
+       __le32 src_mac_addrhi;
+       __le16 src_mac_addrlo;
+       u8 reserved1[2];
+       __le32 gid_dst[4];
+};
+
+struct core_rx_slow_path_cqe {
+       u8 type;
+       u8 ramrod_cmd_id;
+       __le16 echo;
+       __le32 reserved1[7];
+};
+
+union core_rx_cqe_union {
+       struct core_rx_fast_path_cqe rx_cqe_fp;
+       struct core_rx_gsi_offload_cqe rx_cqe_gsi;
+       struct core_rx_slow_path_cqe rx_cqe_sp;
+};
+
+struct core_rx_start_ramrod_data {
+       struct regpair bd_base;
+       struct regpair cqe_pbl_addr;
+       __le16 mtu;
+       __le16 sb_id;
+       u8 sb_index;
+       u8 complete_cqe_flg;
+       u8 complete_event_flg;
+       u8 drop_ttl0_flg;
+       __le16 num_of_pbl_pages;
+       u8 inner_vlan_removal_en;
+       u8 queue_id;
+       u8 main_func_queue;
+       u8 mf_si_bcast_accept_all;
+       u8 mf_si_mcast_accept_all;
+       struct core_rx_action_on_error action_on_error;
+       u8 gsi_offload_flag;
+       u8 reserved[7];
+};
+
+struct core_rx_stop_ramrod_data {
+       u8 complete_cqe_flg;
+       u8 complete_event_flg;
+       u8 queue_id;
+       u8 reserved1;
+       __le16 reserved2[2];
+};
+
+struct core_tx_bd_flags {
+       u8 as_bitfield;
+#define CORE_TX_BD_FLAGS_FORCE_VLAN_MODE_MASK  0x1
+#define CORE_TX_BD_FLAGS_FORCE_VLAN_MODE_SHIFT 0
+#define CORE_TX_BD_FLAGS_VLAN_INSERTION_MASK   0x1
+#define CORE_TX_BD_FLAGS_VLAN_INSERTION_SHIFT  1
+#define CORE_TX_BD_FLAGS_START_BD_MASK 0x1
+#define CORE_TX_BD_FLAGS_START_BD_SHIFT        2
+#define CORE_TX_BD_FLAGS_IP_CSUM_MASK  0x1
+#define CORE_TX_BD_FLAGS_IP_CSUM_SHIFT 3
+#define CORE_TX_BD_FLAGS_L4_CSUM_MASK  0x1
+#define CORE_TX_BD_FLAGS_L4_CSUM_SHIFT 4
+#define CORE_TX_BD_FLAGS_IPV6_EXT_MASK 0x1
+#define CORE_TX_BD_FLAGS_IPV6_EXT_SHIFT        5
+#define CORE_TX_BD_FLAGS_L4_PROTOCOL_MASK      0x1
+#define CORE_TX_BD_FLAGS_L4_PROTOCOL_SHIFT     6
+#define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_MASK      0x1
+#define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_SHIFT 7
+};
+
+struct core_tx_bd {
+       struct regpair addr;
+       __le16 nbytes;
+       __le16 nw_vlan_or_lb_echo;
+       u8 bitfield0;
+#define CORE_TX_BD_NBDS_MASK   0xF
+#define CORE_TX_BD_NBDS_SHIFT  0
+#define CORE_TX_BD_ROCE_FLAV_MASK      0x1
+#define CORE_TX_BD_ROCE_FLAV_SHIFT     4
+#define CORE_TX_BD_RESERVED0_MASK      0x7
+#define CORE_TX_BD_RESERVED0_SHIFT     5
+       struct core_tx_bd_flags bd_flags;
+       __le16 bitfield1;
+#define CORE_TX_BD_L4_HDR_OFFSET_W_MASK        0x3FFF
+#define CORE_TX_BD_L4_HDR_OFFSET_W_SHIFT 0
+#define CORE_TX_BD_TX_DST_MASK 0x1
+#define CORE_TX_BD_TX_DST_SHIFT        14
+#define CORE_TX_BD_RESERVED1_MASK      0x1
+#define CORE_TX_BD_RESERVED1_SHIFT     15
+};
+
+enum core_tx_dest {
+       CORE_TX_DEST_NW,
+       CORE_TX_DEST_LB,
+       MAX_CORE_TX_DEST
+};
+
+struct core_tx_start_ramrod_data {
+       struct regpair pbl_base_addr;
+       __le16 mtu;
+       __le16 sb_id;
+       u8 sb_index;
+       u8 stats_en;
+       u8 stats_id;
+       u8 conn_type;
+       __le16 pbl_size;
+       __le16 qm_pq_id;
+       u8 gsi_offload_flag;
+       u8 resrved[3];
+};
+
+struct core_tx_stop_ramrod_data {
+       __le32 reserved0[2];
+};
+
 struct eth_mstorm_per_pf_stat {
        struct regpair gre_discard_pkts;
        struct regpair vxlan_discard_pkts;
@@ -636,9 +874,33 @@ struct hsi_fp_ver_struct {
 };
 
 /* Mstorm non-triggering VF zone */
+enum malicious_vf_error_id {
+       MALICIOUS_VF_NO_ERROR,
+       VF_PF_CHANNEL_NOT_READY,
+       VF_ZONE_MSG_NOT_VALID,
+       VF_ZONE_FUNC_NOT_ENABLED,
+       ETH_PACKET_TOO_SMALL,
+       ETH_ILLEGAL_VLAN_MODE,
+       ETH_MTU_VIOLATION,
+       ETH_ILLEGAL_INBAND_TAGS,
+       ETH_VLAN_INSERT_AND_INBAND_VLAN,
+       ETH_ILLEGAL_NBDS,
+       ETH_FIRST_BD_WO_SOP,
+       ETH_INSUFFICIENT_BDS,
+       ETH_ILLEGAL_LSO_HDR_NBDS,
+       ETH_ILLEGAL_LSO_MSS,
+       ETH_ZERO_SIZE_BD,
+       ETH_ILLEGAL_LSO_HDR_LEN,
+       ETH_INSUFFICIENT_PAYLOAD,
+       ETH_EDPM_OUT_OF_SYNC,
+       ETH_TUNN_IPV6_EXT_NBD_ERR,
+       ETH_CONTROL_PACKET_VIOLATION,
+       MAX_MALICIOUS_VF_ERROR_ID
+};
+
 struct mstorm_non_trigger_vf_zone {
        struct eth_mstorm_per_queue_stat eth_queue_stat;
-       struct eth_rx_prod_data eth_rx_queue_producers[ETH_MAX_NUM_RX_QUEUES_PER_VF];
+       struct eth_rx_prod_data eth_rx_queue_producers[ETH_MAX_NUM_RX_QUEUES_PER_VF_QUAD];
 };
 
 /* Mstorm VF zone */
@@ -705,13 +967,17 @@ struct pf_start_ramrod_data {
 
 struct protocol_dcb_data {
        u8 dcb_enable_flag;
+       u8 reserved_a;
        u8 dcb_priority;
        u8 dcb_tc;
-       u8 reserved;
+       u8 reserved_b;
+       u8 reserved0;
 };
 
 struct pf_update_tunnel_config {
        u8 update_rx_pf_clss;
+       u8 update_rx_def_ucast_clss;
+       u8 update_rx_def_non_ucast_clss;
        u8 update_tx_pf_clss;
        u8 set_vxlan_udp_port_flg;
        u8 set_geneve_udp_port_flg;
@@ -727,7 +993,7 @@ struct pf_update_tunnel_config {
        u8 tunnel_clss_ipgre;
        __le16 vxlan_udp_port;
        __le16 geneve_udp_port;
-       __le16 reserved[3];
+       __le16 reserved[2];
 };
 
 struct pf_update_ramrod_data {
@@ -736,16 +1002,17 @@ struct pf_update_ramrod_data {
        u8 update_fcoe_dcb_data_flag;
        u8 update_iscsi_dcb_data_flag;
        u8 update_roce_dcb_data_flag;
+       u8 update_rroce_dcb_data_flag;
        u8 update_iwarp_dcb_data_flag;
        u8 update_mf_vlan_flag;
-       u8 reserved;
        struct protocol_dcb_data eth_dcb_data;
        struct protocol_dcb_data fcoe_dcb_data;
        struct protocol_dcb_data iscsi_dcb_data;
        struct protocol_dcb_data roce_dcb_data;
+       struct protocol_dcb_data rroce_dcb_data;
        struct protocol_dcb_data iwarp_dcb_data;
        __le16 mf_vlan;
-       __le16 reserved2;
+       __le16 reserved;
        struct pf_update_tunnel_config tunnel_config;
 };
 
@@ -766,10 +1033,14 @@ enum protocol_version_array_key {
        MAX_PROTOCOL_VERSION_ARRAY_KEY
 };
 
-/* Pstorm non-triggering VF zone */
+struct rdma_sent_stats {
+       struct regpair sent_bytes;
+       struct regpair sent_pkts;
+};
+
 struct pstorm_non_trigger_vf_zone {
        struct eth_pstorm_per_queue_stat eth_queue_stat;
-       struct regpair reserved[2];
+       struct rdma_sent_stats rdma_stats;
 };
 
 /* Pstorm VF zone */
@@ -786,7 +1057,11 @@ struct ramrod_header {
        __le16 echo;
 };
 
-/* Slowpath Element (SPQE) */
+struct rdma_rcv_stats {
+       struct regpair rcv_bytes;
+       struct regpair rcv_pkts;
+};
+
 struct slow_path_element {
        struct ramrod_header hdr;
        struct regpair data_ptr;
@@ -794,7 +1069,7 @@ struct slow_path_element {
 
 /* Tstorm non-triggering VF zone */
 struct tstorm_non_trigger_vf_zone {
-       struct regpair reserved[2];
+       struct rdma_rcv_stats rdma_stats;
 };
 
 struct tstorm_per_port_stat {
@@ -802,9 +1077,14 @@ struct tstorm_per_port_stat {
        struct regpair mac_error_discard;
        struct regpair mftag_filter_discard;
        struct regpair eth_mac_filter_discard;
-       struct regpair reserved[5];
+       struct regpair ll2_mac_filter_discard;
+       struct regpair ll2_conn_disabled_discard;
+       struct regpair iscsi_irregular_pkt;
+       struct regpair reserved;
+       struct regpair roce_irregular_pkt;
        struct regpair eth_irregular_pkt;
-       struct regpair reserved1[2];
+       struct regpair reserved1;
+       struct regpair preroce_irregular_pkt;
        struct regpair eth_gre_tunn_filter_discard;
        struct regpair eth_vxlan_tunn_filter_discard;
        struct regpair eth_geneve_tunn_filter_discard;
@@ -870,7 +1150,13 @@ struct vf_stop_ramrod_data {
        __le32 reserved2;
 };
 
-/* Attentions status block */
+enum vf_zone_size_mode {
+       VF_ZONE_SIZE_MODE_DEFAULT,
+       VF_ZONE_SIZE_MODE_DOUBLE,
+       VF_ZONE_SIZE_MODE_QUAD,
+       MAX_VF_ZONE_SIZE_MODE
+};
+
 struct atten_status_block {
        __le32 atten_bits;
        __le32 atten_ack;
@@ -1442,13 +1728,6 @@ enum bin_dbg_buffer_type {
        MAX_BIN_DBG_BUFFER_TYPE
 };
 
-/* Chip IDs */
-enum chip_ids {
-       CHIP_RESERVED,
-       CHIP_BB_B0,
-       CHIP_RESERVED2,
-       MAX_CHIP_IDS
-};
 
 /* Attention bit mapping */
 struct dbg_attn_bit_mapping {
@@ -1527,6 +1806,371 @@ enum dbg_attn_type {
        MAX_DBG_ATTN_TYPE
 };
 
+/* condition header for registers dump */
+struct dbg_dump_cond_hdr {
+       struct dbg_mode_hdr mode; /* Mode header */
+       u8 block_id; /* block ID */
+       u8 data_size; /* size in dwords of the data following this header */
+};
+
+/* memory data for registers dump */
+struct dbg_dump_mem {
+       __le32 dword0;
+#define DBG_DUMP_MEM_ADDRESS_MASK       0xFFFFFF
+#define DBG_DUMP_MEM_ADDRESS_SHIFT      0
+#define DBG_DUMP_MEM_MEM_GROUP_ID_MASK  0xFF
+#define DBG_DUMP_MEM_MEM_GROUP_ID_SHIFT 24
+       __le32 dword1;
+#define DBG_DUMP_MEM_LENGTH_MASK        0xFFFFFF
+#define DBG_DUMP_MEM_LENGTH_SHIFT       0
+#define DBG_DUMP_MEM_RESERVED_MASK      0xFF
+#define DBG_DUMP_MEM_RESERVED_SHIFT     24
+};
+
+/* register data for registers dump */
+struct dbg_dump_reg {
+       __le32 data;
+#define DBG_DUMP_REG_ADDRESS_MASK  0xFFFFFF /* register address (in dwords) */
+#define DBG_DUMP_REG_ADDRESS_SHIFT 0
+#define DBG_DUMP_REG_LENGTH_MASK   0xFF /* register size (in dwords) */
+#define DBG_DUMP_REG_LENGTH_SHIFT  24
+};
+
+/* split header for registers dump */
+struct dbg_dump_split_hdr {
+       __le32 hdr;
+#define DBG_DUMP_SPLIT_HDR_DATA_SIZE_MASK      0xFFFFFF
+#define DBG_DUMP_SPLIT_HDR_DATA_SIZE_SHIFT     0
+#define DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID_MASK  0xFF
+#define DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID_SHIFT 24
+};
+
+/* condition header for idle check */
+struct dbg_idle_chk_cond_hdr {
+       struct dbg_mode_hdr mode; /* Mode header */
+       __le16 data_size; /* size in dwords of the data following this header */
+};
+
+/* Idle Check condition register */
+struct dbg_idle_chk_cond_reg {
+       __le32 data;
+#define DBG_IDLE_CHK_COND_REG_ADDRESS_MASK   0xFFFFFF
+#define DBG_IDLE_CHK_COND_REG_ADDRESS_SHIFT  0
+#define DBG_IDLE_CHK_COND_REG_BLOCK_ID_MASK  0xFF
+#define DBG_IDLE_CHK_COND_REG_BLOCK_ID_SHIFT 24
+       __le16 num_entries; /* number of registers entries to check */
+       u8 entry_size; /* size of registers entry (in dwords) */
+       u8 start_entry; /* index of the first entry to check */
+};
+
+/* Idle Check info register */
+struct dbg_idle_chk_info_reg {
+       __le32 data;
+#define DBG_IDLE_CHK_INFO_REG_ADDRESS_MASK   0xFFFFFF
+#define DBG_IDLE_CHK_INFO_REG_ADDRESS_SHIFT  0
+#define DBG_IDLE_CHK_INFO_REG_BLOCK_ID_MASK  0xFF
+#define DBG_IDLE_CHK_INFO_REG_BLOCK_ID_SHIFT 24
+       __le16 size; /* register size in dwords */
+       struct dbg_mode_hdr mode; /* Mode header */
+};
+
+/* Idle Check register */
+union dbg_idle_chk_reg {
+       struct dbg_idle_chk_cond_reg cond_reg; /* condition register */
+       struct dbg_idle_chk_info_reg info_reg; /* info register */
+};
+
+/* Idle Check result header */
+struct dbg_idle_chk_result_hdr {
+       __le16 rule_id; /* Failing rule index */
+       __le16 mem_entry_id; /* Failing memory entry index */
+       u8 num_dumped_cond_regs; /* number of dumped condition registers */
+       u8 num_dumped_info_regs; /* number of dumped condition registers */
+       u8 severity; /* from dbg_idle_chk_severity_types enum */
+       u8 reserved;
+};
+
+/* Idle Check result register header */
+struct dbg_idle_chk_result_reg_hdr {
+       u8 data;
+#define DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM_MASK  0x1
+#define DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM_SHIFT 0
+#define DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID_MASK  0x7F
+#define DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID_SHIFT 1
+       u8 start_entry; /* index of the first checked entry */
+       __le16 size; /* register size in dwords */
+};
+
+/* Idle Check rule */
+struct dbg_idle_chk_rule {
+       __le16 rule_id; /* Idle Check rule ID */
+       u8 severity; /* value from dbg_idle_chk_severity_types enum */
+       u8 cond_id; /* Condition ID */
+       u8 num_cond_regs; /* number of condition registers */
+       u8 num_info_regs; /* number of info registers */
+       u8 num_imms; /* number of immediates in the condition */
+       u8 reserved1;
+       __le16 reg_offset; /* offset of this rules registers in the idle check
+                           * register array (in dbg_idle_chk_reg units).
+                           */
+       __le16 imm_offset; /* offset of this rules immediate values in the
+                           * immediate values array (in dwords).
+                           */
+};
+
+/* Idle Check rule parsing data */
+struct dbg_idle_chk_rule_parsing_data {
+       __le32 data;
+#define DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG_MASK  0x1
+#define DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG_SHIFT 0
+#define DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET_MASK  0x7FFFFFFF
+#define DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET_SHIFT 1
+};
+
+/* idle check severity types */
+enum dbg_idle_chk_severity_types {
+       /* idle check failure should cause an error */
+       IDLE_CHK_SEVERITY_ERROR,
+       /* idle check failure should cause an error only if theres no traffic */
+       IDLE_CHK_SEVERITY_ERROR_NO_TRAFFIC,
+       /* idle check failure should cause a warning */
+       IDLE_CHK_SEVERITY_WARNING,
+       MAX_DBG_IDLE_CHK_SEVERITY_TYPES
+};
+
+/* Debug Bus block data */
+struct dbg_bus_block_data {
+       u8 enabled; /* Indicates if the block is enabled for recording (0/1) */
+       u8 hw_id; /* HW ID associated with the block */
+       u8 line_num; /* Debug line number to select */
+       u8 right_shift; /* Number of units to  right the debug data (0-3) */
+       u8 cycle_en; /* 4-bit value: bit i set -> unit i is enabled. */
+       u8 force_valid; /* 4-bit value: bit i set -> unit i is forced valid. */
+       u8 force_frame; /* 4-bit value: bit i set -> unit i frame bit is forced.
+                        */
+       u8 reserved;
+};
+
+/* Debug Bus Clients */
+enum dbg_bus_clients {
+       DBG_BUS_CLIENT_RBCN,
+       DBG_BUS_CLIENT_RBCP,
+       DBG_BUS_CLIENT_RBCR,
+       DBG_BUS_CLIENT_RBCT,
+       DBG_BUS_CLIENT_RBCU,
+       DBG_BUS_CLIENT_RBCF,
+       DBG_BUS_CLIENT_RBCX,
+       DBG_BUS_CLIENT_RBCS,
+       DBG_BUS_CLIENT_RBCH,
+       DBG_BUS_CLIENT_RBCZ,
+       DBG_BUS_CLIENT_OTHER_ENGINE,
+       DBG_BUS_CLIENT_TIMESTAMP,
+       DBG_BUS_CLIENT_CPU,
+       DBG_BUS_CLIENT_RBCY,
+       DBG_BUS_CLIENT_RBCQ,
+       DBG_BUS_CLIENT_RBCM,
+       DBG_BUS_CLIENT_RBCB,
+       DBG_BUS_CLIENT_RBCW,
+       DBG_BUS_CLIENT_RBCV,
+       MAX_DBG_BUS_CLIENTS
+};
+
+/* Debug Bus memory address */
+struct dbg_bus_mem_addr {
+       __le32 lo;
+       __le32 hi;
+};
+
+/* Debug Bus PCI buffer data */
+struct dbg_bus_pci_buf_data {
+       struct dbg_bus_mem_addr phys_addr; /* PCI buffer physical address */
+       struct dbg_bus_mem_addr virt_addr; /* PCI buffer virtual address */
+       __le32 size; /* PCI buffer size in bytes */
+};
+
+/* Debug Bus Storm EID range filter params */
+struct dbg_bus_storm_eid_range_params {
+       u8 min; /* Minimal event ID to filter on */
+       u8 max; /* Maximal event ID to filter on */
+};
+
+/* Debug Bus Storm EID mask filter params */
+struct dbg_bus_storm_eid_mask_params {
+       u8 val; /* Event ID value */
+       u8 mask; /* Event ID mask. 1s in the mask = dont care bits. */
+};
+
+/* Debug Bus Storm EID filter params */
+union dbg_bus_storm_eid_params {
+       struct dbg_bus_storm_eid_range_params range;
+       struct dbg_bus_storm_eid_mask_params mask;
+};
+
+/* Debug Bus Storm data */
+struct dbg_bus_storm_data {
+       u8 fast_enabled;
+       u8 fast_mode;
+       u8 slow_enabled;
+       u8 slow_mode;
+       u8 hw_id;
+       u8 eid_filter_en;
+       u8 eid_range_not_mask;
+       u8 cid_filter_en;
+       union dbg_bus_storm_eid_params eid_filter_params;
+       __le16 reserved;
+       __le32 cid;
+};
+
+/* Debug Bus data */
+struct dbg_bus_data {
+       __le32 app_version; /* The tools version number of the application */
+       u8 state; /* The current debug bus state */
+       u8 hw_dwords; /* HW dwords per cycle */
+       u8 next_hw_id; /* Next HW ID to be associated with an input */
+       u8 num_enabled_blocks; /* Number of blocks enabled for recording */
+       u8 num_enabled_storms; /* Number of Storms enabled for recording */
+       u8 target; /* Output target */
+       u8 next_trigger_state; /* ID of next trigger state to be added */
+       u8 next_constraint_id; /* ID of next filter/trigger constraint to be
+                               * added.
+                               */
+       u8 one_shot_en; /* Indicates if one-shot mode is enabled (0/1) */
+       u8 grc_input_en; /* Indicates if GRC recording is enabled (0/1) */
+       u8 timestamp_input_en; /* Indicates if timestamp recording is enabled
+                               * (0/1).
+                               */
+       u8 filter_en; /* Indicates if the recording filter is enabled (0/1) */
+       u8 trigger_en; /* Indicates if the recording trigger is enabled (0/1) */
+       u8 adding_filter; /* If true, the next added constraint belong to the
+                          * filter. Otherwise, it belongs to the last added
+                          * trigger state. Valid only if either filter or
+                          * triggers are enabled.
+                          */
+       u8 filter_pre_trigger; /* Indicates if the recording filter should be
+                               * applied before the trigger. Valid only if both
+                               * filter and trigger are enabled (0/1).
+                               */
+       u8 filter_post_trigger; /* Indicates if the recording filter should be
+                                * applied after the trigger. Valid only if both
+                                * filter and trigger are enabled (0/1).
+                                */
+       u8 unify_inputs; /* If true, all inputs are associated with HW ID 0.
+                         * Otherwise, each input is assigned a different HW ID
+                         * (0/1).
+                         */
+       u8 rcv_from_other_engine; /* Indicates if the other engine sends it NW
+                                  * recording to this engine (0/1).
+                                  */
+       struct dbg_bus_pci_buf_data pci_buf; /* Debug Bus PCI buffer data. Valid
+                                             * only when the target is
+                                             * DBG_BUS_TARGET_ID_PCI.
+                                             */
+       __le16 reserved;
+       struct dbg_bus_block_data blocks[80];/* Debug Bus data for each block */
+       struct dbg_bus_storm_data storms[6]; /* Debug Bus data for each block */
+};
+
+/* Debug bus frame modes */
+enum dbg_bus_frame_modes {
+       DBG_BUS_FRAME_MODE_0HW_4ST = 0, /* 0 HW dwords, 4 Storm dwords */
+       DBG_BUS_FRAME_MODE_4HW_0ST = 3, /* 4 HW dwords, 0 Storm dwords */
+       DBG_BUS_FRAME_MODE_8HW_0ST = 4, /* 8 HW dwords, 0 Storm dwords */
+       MAX_DBG_BUS_FRAME_MODES
+};
+
+/* Debug bus states */
+enum dbg_bus_states {
+       DBG_BUS_STATE_IDLE, /* debug bus idle state (not recording) */
+       DBG_BUS_STATE_READY, /* debug bus is ready for configuration and
+                             * recording.
+                             */
+       DBG_BUS_STATE_RECORDING, /* debug bus is currently recording */
+       DBG_BUS_STATE_STOPPED, /* debug bus recording has stopped */
+       MAX_DBG_BUS_STATES
+};
+
+/* Debug bus target IDs */
+enum dbg_bus_targets {
+       /* records debug bus to DBG block internal buffer */
+       DBG_BUS_TARGET_ID_INT_BUF,
+       /* records debug bus to the NW */
+       DBG_BUS_TARGET_ID_NIG,
+       /* records debug bus to a PCI buffer */
+       DBG_BUS_TARGET_ID_PCI,
+       MAX_DBG_BUS_TARGETS
+};
+
+/* GRC Dump data */
+struct dbg_grc_data {
+       __le32 param_val[40]; /* Value of each GRC parameter. Array size must
+                              * match the enum dbg_grc_params.
+                              */
+       u8 param_set_by_user[40]; /* Indicates for each GRC parameter if it was
+                                  * set by the user (0/1). Array size must
+                                  * match the enum dbg_grc_params.
+                                  */
+};
+
+/* Debug GRC params */
+enum dbg_grc_params {
+       DBG_GRC_PARAM_DUMP_TSTORM, /* dump Tstorm memories (0/1) */
+       DBG_GRC_PARAM_DUMP_MSTORM, /* dump Mstorm memories (0/1) */
+       DBG_GRC_PARAM_DUMP_USTORM, /* dump Ustorm memories (0/1) */
+       DBG_GRC_PARAM_DUMP_XSTORM, /* dump Xstorm memories (0/1) */
+       DBG_GRC_PARAM_DUMP_YSTORM, /* dump Ystorm memories (0/1) */
+       DBG_GRC_PARAM_DUMP_PSTORM, /* dump Pstorm memories (0/1) */
+       DBG_GRC_PARAM_DUMP_REGS, /* dump non-memory registers (0/1) */
+       DBG_GRC_PARAM_DUMP_RAM, /* dump Storm internal RAMs (0/1) */
+       DBG_GRC_PARAM_DUMP_PBUF, /* dump Storm passive buffer (0/1) */
+       DBG_GRC_PARAM_DUMP_IOR, /* dump Storm IORs (0/1) */
+       DBG_GRC_PARAM_DUMP_VFC, /* dump VFC memories (0/1) */
+       DBG_GRC_PARAM_DUMP_CM_CTX, /* dump CM contexts (0/1) */
+       DBG_GRC_PARAM_DUMP_PXP, /* dump PXP memories (0/1) */
+       DBG_GRC_PARAM_DUMP_RSS, /* dump RSS memories (0/1) */
+       DBG_GRC_PARAM_DUMP_CAU, /* dump CAU memories (0/1) */
+       DBG_GRC_PARAM_DUMP_QM, /* dump QM memories (0/1) */
+       DBG_GRC_PARAM_DUMP_MCP, /* dump MCP memories (0/1) */
+       DBG_GRC_PARAM_RESERVED, /* reserved */
+       DBG_GRC_PARAM_DUMP_CFC, /* dump CFC memories (0/1) */
+       DBG_GRC_PARAM_DUMP_IGU, /* dump IGU memories (0/1) */
+       DBG_GRC_PARAM_DUMP_BRB, /* dump BRB memories (0/1) */
+       DBG_GRC_PARAM_DUMP_BTB, /* dump BTB memories (0/1) */
+       DBG_GRC_PARAM_DUMP_BMB, /* dump BMB memories (0/1) */
+       DBG_GRC_PARAM_DUMP_NIG, /* dump NIG memories (0/1) */
+       DBG_GRC_PARAM_DUMP_MULD, /* dump MULD memories (0/1) */
+       DBG_GRC_PARAM_DUMP_PRS, /* dump PRS memories (0/1) */
+       DBG_GRC_PARAM_DUMP_DMAE, /* dump PRS memories (0/1) */
+       DBG_GRC_PARAM_DUMP_TM, /* dump TM (timers) memories (0/1) */
+       DBG_GRC_PARAM_DUMP_SDM, /* dump SDM memories (0/1) */
+       DBG_GRC_PARAM_DUMP_DIF, /* dump DIF memories (0/1) */
+       DBG_GRC_PARAM_DUMP_STATIC, /* dump static debug data (0/1) */
+       DBG_GRC_PARAM_UNSTALL, /* un-stall Storms after dump (0/1) */
+       DBG_GRC_PARAM_NUM_LCIDS, /* number of LCIDs (0..320) */
+       DBG_GRC_PARAM_NUM_LTIDS, /* number of LTIDs (0..320) */
+       /* preset: exclude all memories from dump (1 only) */
+       DBG_GRC_PARAM_EXCLUDE_ALL,
+       /* preset: include memories for crash dump (1 only) */
+       DBG_GRC_PARAM_CRASH,
+       /* perform dump only if MFW is responding (0/1) */
+       DBG_GRC_PARAM_PARITY_SAFE,
+       DBG_GRC_PARAM_DUMP_CM, /* dump CM memories (0/1) */
+       DBG_GRC_PARAM_DUMP_PHY, /* dump PHY memories (0/1) */
+       MAX_DBG_GRC_PARAMS
+};
+
+/* Debug reset registers */
+enum dbg_reset_regs {
+       DBG_RESET_REG_MISCS_PL_UA,
+       DBG_RESET_REG_MISCS_PL_HV,
+       DBG_RESET_REG_MISCS_PL_HV_2,
+       DBG_RESET_REG_MISC_PL_UA,
+       DBG_RESET_REG_MISC_PL_HV,
+       DBG_RESET_REG_MISC_PL_PDA_VMAIN_1,
+       DBG_RESET_REG_MISC_PL_PDA_VMAIN_2,
+       DBG_RESET_REG_MISC_PL_PDA_VAUX,
+       MAX_DBG_RESET_REGS
+};
+
 /* Debug status codes */
 enum dbg_status {
        DBG_STATUS_OK,
@@ -1579,9 +2223,45 @@ enum dbg_status {
        DBG_STATUS_REG_FIFO_BAD_DATA,
        DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA,
        DBG_STATUS_DBG_ARRAY_NOT_SET,
+       DBG_STATUS_MULTI_BLOCKS_WITH_FILTER,
        MAX_DBG_STATUS
 };
 
+/* Debug Storms IDs */
+enum dbg_storms {
+       DBG_TSTORM_ID,
+       DBG_MSTORM_ID,
+       DBG_USTORM_ID,
+       DBG_XSTORM_ID,
+       DBG_YSTORM_ID,
+       DBG_PSTORM_ID,
+       MAX_DBG_STORMS
+};
+
+/* Idle Check data */
+struct idle_chk_data {
+       __le32 buf_size; /* Idle check buffer size in dwords */
+       u8 buf_size_set; /* Indicates if the idle check buffer size was set
+                         * (0/1).
+                         */
+       u8 reserved1;
+       __le16 reserved2;
+};
+
+/* Debug Tools data (per HW function) */
+struct dbg_tools_data {
+       struct dbg_grc_data grc; /* GRC Dump data */
+       struct dbg_bus_data bus; /* Debug Bus data */
+       struct idle_chk_data idle_chk; /* Idle Check data */
+       u8 mode_enable[40]; /* Indicates if a mode is enabled (0/1) */
+       u8 block_in_reset[80]; /* Indicates if a block is in reset state (0/1).
+                               */
+       u8 chip_id; /* Chip ID (from enum chip_ids) */
+       u8 platform_id; /* Platform ID (from enum platform_ids) */
+       u8 initialized; /* Indicates if the data was initialized */
+       u8 reserved;
+};
+
 /********************************/
 /* HSI Init Functions constants */
 /********************************/
@@ -1589,7 +2269,41 @@ enum dbg_status {
 /* Number of VLAN priorities */
 #define NUM_OF_VLAN_PRIORITIES 8
 
-/* QM per-port init parameters */
+struct init_brb_ram_req {
+       __le32 guranteed_per_tc;
+       __le32 headroom_per_tc;
+       __le32 min_pkt_size;
+       __le32 max_ports_per_engine;
+       u8 num_active_tcs[MAX_NUM_PORTS];
+};
+
+struct init_ets_tc_req {
+       u8 use_sp;
+       u8 use_wfq;
+       __le16 weight;
+};
+
+struct init_ets_req {
+       __le32 mtu;
+       struct init_ets_tc_req tc_req[NUM_OF_TCS];
+};
+
+struct init_nig_lb_rl_req {
+       __le16 lb_mac_rate;
+       __le16 lb_rate;
+       __le32 mtu;
+       __le16 tc_rate[NUM_OF_PHYS_TCS];
+};
+
+struct init_nig_pri_tc_map_entry {
+       u8 tc_id;
+       u8 valid;
+};
+
+struct init_nig_pri_tc_map_req {
+       struct init_nig_pri_tc_map_entry pri[NUM_OF_VLAN_PRIORITIES];
+};
+
 struct init_qm_port_params {
        u8 active;
        u8 active_phys_tcs;
@@ -1619,7 +2333,7 @@ struct init_qm_vport_params {
 
 /* Width of GRC address in bits (addresses are specified in dwords) */
 #define GRC_ADDR_BITS  23
-#define MAX_GRC_ADDR   ((1 << GRC_ADDR_BITS) - 1)
+#define MAX_GRC_ADDR   (BIT(GRC_ADDR_BITS) - 1)
 
 /* indicates an init that should be applied to any phase ID */
 #define ANY_PHASE_ID   0xffff
@@ -1627,15 +2341,50 @@ struct init_qm_vport_params {
 /* Max size in dwords of a zipped array */
 #define MAX_ZIPPED_SIZE        8192
 
+struct fw_asserts_ram_section {
+       __le16 section_ram_line_offset;
+       __le16 section_ram_line_size;
+       u8 list_dword_offset;
+       u8 list_element_dword_size;
+       u8 list_num_elements;
+       u8 list_next_index_dword_offset;
+};
+
+struct fw_ver_num {
+       u8 major; /* Firmware major version number */
+       u8 minor; /* Firmware minor version number */
+       u8 rev; /* Firmware revision version number */
+       u8 eng; /* Firmware engineering version number (for bootleg versions) */
+};
+
+struct fw_ver_info {
+       __le16 tools_ver; /* Tools version number */
+       u8 image_id; /* FW image ID (e.g. main) */
+       u8 reserved1;
+       struct fw_ver_num num; /* FW version number */
+       __le32 timestamp; /* FW Timestamp in unix time  (sec. since 1970) */
+       __le32 reserved2;
+};
+
+struct fw_info {
+       struct fw_ver_info ver;
+       struct fw_asserts_ram_section fw_asserts_section;
+};
+
+struct fw_info_location {
+       __le32 grc_addr;
+       __le32 size;
+};
+
 enum init_modes {
        MODE_RESERVED,
        MODE_BB_B0,
-       MODE_RESERVED2,
+       MODE_K2,
        MODE_ASIC,
+       MODE_RESERVED2,
        MODE_RESERVED3,
        MODE_RESERVED4,
        MODE_RESERVED5,
-       MODE_RESERVED6,
        MODE_SF,
        MODE_MF_SD,
        MODE_MF_SI,
@@ -1644,7 +2393,7 @@ enum init_modes {
        MODE_PORTS_PER_ENG_4,
        MODE_100G,
        MODE_40G,
-       MODE_RESERVED7,
+       MODE_RESERVED6,
        MAX_INIT_MODES
 };
 
@@ -1674,11 +2423,11 @@ struct bin_buffer_hdr {
 
 /* binary init buffer types */
 enum bin_init_buffer_type {
-       BIN_BUF_FW_VER_INFO,
+       BIN_BUF_INIT_FW_VER_INFO,
        BIN_BUF_INIT_CMD,
        BIN_BUF_INIT_VAL,
        BIN_BUF_INIT_MODE_TREE,
-       BIN_BUF_IRO,
+       BIN_BUF_INIT_IRO,
        MAX_BIN_INIT_BUFFER_TYPE
 };
 
@@ -1902,8 +2651,276 @@ struct iro {
        __le16 size;
 };
 
+/***************************** Public Functions *******************************/
 /**
- * @brief qed_dbg_print_attn - Prints attention registers values in the specified results struct.
+ * @brief qed_dbg_set_bin_ptr - Sets a pointer to the binary data with debug
+ *     arrays.
+ *
+ * @param bin_ptr - a pointer to the binary data with debug arrays.
+ */
+enum dbg_status qed_dbg_set_bin_ptr(const u8 * const bin_ptr);
+/**
+ * @brief qed_dbg_grc_get_dump_buf_size - Returns the required buffer size for
+ *     GRC Dump.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param buf_size - OUT: required buffer size (in dwords) for the GRC Dump
+ *     data.
+ *
+ * @return error if one of the following holds:
+ *     - the version wasn't set
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_grc_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+                                             struct qed_ptt *p_ptt,
+                                             u32 *buf_size);
+/**
+ * @brief qed_dbg_grc_dump - Dumps GRC data into the specified buffer.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param dump_buf - Pointer to write the collected GRC data into.
+ * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+ * @param num_dumped_dwords - OUT: number of dumped dwords.
+ *
+ * @return error if one of the following holds:
+ *     - the version wasn't set
+ *     - the specified dump buffer is too small
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn,
+                                struct qed_ptt *p_ptt,
+                                u32 *dump_buf,
+                                u32 buf_size_in_dwords,
+                                u32 *num_dumped_dwords);
+/**
+ * @brief qed_dbg_idle_chk_get_dump_buf_size - Returns the required buffer size
+ *     for idle check results.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param buf_size - OUT: required buffer size (in dwords) for the idle check
+ *     data.
+ *
+ * @return error if one of the following holds:
+ *     - the version wasn't set
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_idle_chk_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+                                                  struct qed_ptt *p_ptt,
+                                                  u32 *buf_size);
+/**
+ * @brief qed_dbg_idle_chk_dump - Performs idle check and writes the results
+ *     into the specified buffer.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param dump_buf - Pointer to write the idle check data into.
+ * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+ * @param num_dumped_dwords - OUT: number of dumped dwords.
+ *
+ * @return error if one of the following holds:
+ *     - the version wasn't set
+ *     - the specified buffer is too small
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn,
+                                     struct qed_ptt *p_ptt,
+                                     u32 *dump_buf,
+                                     u32 buf_size_in_dwords,
+                                     u32 *num_dumped_dwords);
+/**
+ * @brief qed_dbg_mcp_trace_get_dump_buf_size - Returns the required buffer size
+ *     for mcp trace results.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param buf_size - OUT: required buffer size (in dwords) for mcp trace data.
+ *
+ * @return error if one of the following holds:
+ *     - the version wasn't set
+ *     - the trace data in MCP scratchpad contain an invalid signature
+ *     - the bundle ID in NVRAM is invalid
+ *     - the trace meta data cannot be found (in NVRAM or image file)
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_mcp_trace_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+                                                   struct qed_ptt *p_ptt,
+                                                   u32 *buf_size);
+/**
+ * @brief qed_dbg_mcp_trace_dump - Performs mcp trace and writes the results
+ *     into the specified buffer.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param dump_buf - Pointer to write the mcp trace data into.
+ * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+ * @param num_dumped_dwords - OUT: number of dumped dwords.
+ *
+ * @return error if one of the following holds:
+ *     - the version wasn't set
+ *     - the specified buffer is too small
+ *     - the trace data in MCP scratchpad contain an invalid signature
+ *     - the bundle ID in NVRAM is invalid
+ *     - the trace meta data cannot be found (in NVRAM or image file)
+ *     - the trace meta data cannot be read (from NVRAM or image file)
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn,
+                                      struct qed_ptt *p_ptt,
+                                      u32 *dump_buf,
+                                      u32 buf_size_in_dwords,
+                                      u32 *num_dumped_dwords);
+/**
+ * @brief qed_dbg_reg_fifo_get_dump_buf_size - Returns the required buffer size
+ *     for grc trace fifo results.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param buf_size - OUT: required buffer size (in dwords) for reg fifo data.
+ *
+ * @return error if one of the following holds:
+ *     - the version wasn't set
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_reg_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+                                                  struct qed_ptt *p_ptt,
+                                                  u32 *buf_size);
+/**
+ * @brief qed_dbg_reg_fifo_dump - Reads the reg fifo and writes the results into
+ *     the specified buffer.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param dump_buf - Pointer to write the reg fifo data into.
+ * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+ * @param num_dumped_dwords - OUT: number of dumped dwords.
+ *
+ * @return error if one of the following holds:
+ *     - the version wasn't set
+ *     - the specified buffer is too small
+ *     - DMAE transaction failed
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn,
+                                     struct qed_ptt *p_ptt,
+                                     u32 *dump_buf,
+                                     u32 buf_size_in_dwords,
+                                     u32 *num_dumped_dwords);
+/**
+ * @brief qed_dbg_igu_fifo_get_dump_buf_size - Returns the required buffer size
+ *     for the IGU fifo results.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param buf_size - OUT: required buffer size (in dwords) for the IGU fifo
+ *     data.
+ *
+ * @return error if one of the following holds:
+ *     - the version wasn't set
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_igu_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+                                                  struct qed_ptt *p_ptt,
+                                                  u32 *buf_size);
+/**
+ * @brief qed_dbg_igu_fifo_dump - Reads the IGU fifo and writes the results into
+ *     the specified buffer.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param dump_buf - Pointer to write the IGU fifo data into.
+ * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+ * @param num_dumped_dwords - OUT: number of dumped dwords.
+ *
+ * @return error if one of the following holds:
+ *     - the version wasn't set
+ *     - the specified buffer is too small
+ *     - DMAE transaction failed
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_igu_fifo_dump(struct qed_hwfn *p_hwfn,
+                                     struct qed_ptt *p_ptt,
+                                     u32 *dump_buf,
+                                     u32 buf_size_in_dwords,
+                                     u32 *num_dumped_dwords);
+/**
+ * @brief qed_dbg_protection_override_get_dump_buf_size - Returns the required
+ *     buffer size for protection override window results.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param buf_size - OUT: required buffer size (in dwords) for protection
+ *     override data.
+ *
+ * @return error if one of the following holds:
+ *     - the version wasn't set
+ * Otherwise, returns ok.
+ */
+enum dbg_status
+qed_dbg_protection_override_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+                                             struct qed_ptt *p_ptt,
+                                             u32 *buf_size);
+/**
+ * @brief qed_dbg_protection_override_dump - Reads protection override window
+ *     entries and writes the results into the specified buffer.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param dump_buf - Pointer to write the protection override data into.
+ * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+ * @param num_dumped_dwords - OUT: number of dumped dwords.
+ *
+ * @return error if one of the following holds:
+ *     - the version wasn't set
+ *     - the specified buffer is too small
+ *     - DMAE transaction failed
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_protection_override_dump(struct qed_hwfn *p_hwfn,
+                                                struct qed_ptt *p_ptt,
+                                                u32 *dump_buf,
+                                                u32 buf_size_in_dwords,
+                                                u32 *num_dumped_dwords);
+/**
+ * @brief qed_dbg_fw_asserts_get_dump_buf_size - Returns the required buffer
+ *     size for FW Asserts results.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param buf_size - OUT: required buffer size (in dwords) for FW Asserts data.
+ *
+ * @return error if one of the following holds:
+ *     - the version wasn't set
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_fw_asserts_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+                                                    struct qed_ptt *p_ptt,
+                                                    u32 *buf_size);
+/**
+ * @brief qed_dbg_fw_asserts_dump - Reads the FW Asserts and writes the results
+ *     into the specified buffer.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param dump_buf - Pointer to write the FW Asserts data into.
+ * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+ * @param num_dumped_dwords - OUT: number of dumped dwords.
+ *
+ * @return error if one of the following holds:
+ *     - the version wasn't set
+ *     - the specified buffer is too small
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_fw_asserts_dump(struct qed_hwfn *p_hwfn,
+                                       struct qed_ptt *p_ptt,
+                                       u32 *dump_buf,
+                                       u32 buf_size_in_dwords,
+                                       u32 *num_dumped_dwords);
+/**
+ * @brief qed_dbg_print_attn - Prints attention registers values in the
+ *     specified results struct.
  *
  * @param p_hwfn
  * @param results - Pointer to the attention read results
@@ -1915,47 +2932,241 @@ struct iro {
 enum dbg_status qed_dbg_print_attn(struct qed_hwfn *p_hwfn,
                                   struct dbg_attn_block_result *results);
 
+/******************************** Constants **********************************/
+
 #define MAX_NAME_LEN   16
 
+/***************************** Public Functions *******************************/
+/**
+ * @brief qed_dbg_user_set_bin_ptr - Sets a pointer to the binary data with
+ *     debug arrays.
+ *
+ * @param bin_ptr - a pointer to the binary data with debug arrays.
+ */
+enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr);
+/**
+ * @brief qed_dbg_get_status_str - Returns a string for the specified status.
+ *
+ * @param status - a debug status code.
+ *
+ * @return a string for the specified status
+ */
+const char *qed_dbg_get_status_str(enum dbg_status status);
+/**
+ * @brief qed_get_idle_chk_results_buf_size - Returns the required buffer size
+ *     for idle check results (in bytes).
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - idle check dump buffer.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+ *     results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_get_idle_chk_results_buf_size(struct qed_hwfn *p_hwfn,
+                                                 u32 *dump_buf,
+                                                 u32  num_dumped_dwords,
+                                                 u32 *results_buf_size);
+/**
+ * @brief qed_print_idle_chk_results - Prints idle check results
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - idle check dump buffer.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf - buffer for printing the idle check results.
+ * @param num_errors - OUT: number of errors found in idle check.
+ * @param num_warnings - OUT: number of warnings found in idle check.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn,
+                                          u32 *dump_buf,
+                                          u32 num_dumped_dwords,
+                                          char *results_buf,
+                                          u32 *num_errors,
+                                          u32 *num_warnings);
+/**
+ * @brief qed_get_mcp_trace_results_buf_size - Returns the required buffer size
+ *     for MCP Trace results (in bytes).
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - MCP Trace dump buffer.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+ *     results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn,
+                                                  u32 *dump_buf,
+                                                  u32 num_dumped_dwords,
+                                                  u32 *results_buf_size);
+/**
+ * @brief qed_print_mcp_trace_results - Prints MCP Trace results
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - mcp trace dump buffer, starting from the header.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf - buffer for printing the mcp trace results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
+                                           u32 *dump_buf,
+                                           u32 num_dumped_dwords,
+                                           char *results_buf);
+/**
+ * @brief qed_get_reg_fifo_results_buf_size - Returns the required buffer size
+ *     for reg_fifo results (in bytes).
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - reg fifo dump buffer.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+ *     results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
+                                                 u32 *dump_buf,
+                                                 u32 num_dumped_dwords,
+                                                 u32 *results_buf_size);
+/**
+ * @brief qed_print_reg_fifo_results - Prints reg fifo results
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - reg fifo dump buffer, starting from the header.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf - buffer for printing the reg fifo results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_print_reg_fifo_results(struct qed_hwfn *p_hwfn,
+                                          u32 *dump_buf,
+                                          u32 num_dumped_dwords,
+                                          char *results_buf);
+/**
+ * @brief qed_get_igu_fifo_results_buf_size - Returns the required buffer size
+ *     for igu_fifo results (in bytes).
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - IGU fifo dump buffer.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+ *     results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_get_igu_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
+                                                 u32 *dump_buf,
+                                                 u32 num_dumped_dwords,
+                                                 u32 *results_buf_size);
+/**
+ * @brief qed_print_igu_fifo_results - Prints IGU fifo results
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - IGU fifo dump buffer, starting from the header.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf - buffer for printing the IGU fifo results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_print_igu_fifo_results(struct qed_hwfn *p_hwfn,
+                                          u32 *dump_buf,
+                                          u32 num_dumped_dwords,
+                                          char *results_buf);
+/**
+ * @brief qed_get_protection_override_results_buf_size - Returns the required
+ *     buffer size for protection override results (in bytes).
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - protection override dump buffer.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+ *     results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status
+qed_get_protection_override_results_buf_size(struct qed_hwfn *p_hwfn,
+                                            u32 *dump_buf,
+                                            u32 num_dumped_dwords,
+                                            u32 *results_buf_size);
+/**
+ * @brief qed_print_protection_override_results - Prints protection override
+ *     results.
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - protection override dump buffer, starting from the header.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf - buffer for printing the reg fifo results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_print_protection_override_results(struct qed_hwfn *p_hwfn,
+                                                     u32 *dump_buf,
+                                                     u32 num_dumped_dwords,
+                                                     char *results_buf);
+/**
+ * @brief qed_get_fw_asserts_results_buf_size - Returns the required buffer size
+ *     for FW Asserts results (in bytes).
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - FW Asserts dump buffer.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+ *     results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_get_fw_asserts_results_buf_size(struct qed_hwfn *p_hwfn,
+                                                   u32 *dump_buf,
+                                                   u32 num_dumped_dwords,
+                                                   u32 *results_buf_size);
+/**
+ * @brief qed_print_fw_asserts_results - Prints FW Asserts results
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - FW Asserts dump buffer, starting from the header.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf - buffer for printing the FW Asserts results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn,
+                                            u32 *dump_buf,
+                                            u32 num_dumped_dwords,
+                                            char *results_buf);
 /* Win 2 */
-#define GTT_BAR0_MAP_REG_IGU_CMD \
-       0x00f000UL
+#define GTT_BAR0_MAP_REG_IGU_CMD       0x00f000UL
 
 /* Win 3 */
-#define GTT_BAR0_MAP_REG_TSDM_RAM \
-       0x010000UL
+#define GTT_BAR0_MAP_REG_TSDM_RAM      0x010000UL
 
 /* Win 4 */
-#define GTT_BAR0_MAP_REG_MSDM_RAM \
-       0x011000UL
+#define GTT_BAR0_MAP_REG_MSDM_RAM      0x011000UL
 
 /* Win 5 */
-#define GTT_BAR0_MAP_REG_MSDM_RAM_1024 \
-       0x012000UL
+#define GTT_BAR0_MAP_REG_MSDM_RAM_1024 0x012000UL
 
 /* Win 6 */
-#define GTT_BAR0_MAP_REG_USDM_RAM \
-       0x013000UL
+#define GTT_BAR0_MAP_REG_USDM_RAM      0x013000UL
 
 /* Win 7 */
-#define GTT_BAR0_MAP_REG_USDM_RAM_1024 \
-       0x014000UL
+#define GTT_BAR0_MAP_REG_USDM_RAM_1024 0x014000UL
 
 /* Win 8 */
-#define GTT_BAR0_MAP_REG_USDM_RAM_2048 \
-       0x015000UL
+#define GTT_BAR0_MAP_REG_USDM_RAM_2048 0x015000UL
 
 /* Win 9 */
-#define GTT_BAR0_MAP_REG_XSDM_RAM \
-       0x016000UL
+#define GTT_BAR0_MAP_REG_XSDM_RAM      0x016000UL
 
 /* Win 10 */
-#define GTT_BAR0_MAP_REG_YSDM_RAM \
-       0x017000UL
+#define GTT_BAR0_MAP_REG_YSDM_RAM      0x017000UL
 
 /* Win 11 */
-#define GTT_BAR0_MAP_REG_PSDM_RAM \
-       0x018000UL
+#define GTT_BAR0_MAP_REG_PSDM_RAM      0x018000UL
 
 /**
  * @brief qed_qm_pf_mem_size - prepare QM ILT sizes
@@ -2003,7 +3214,7 @@ struct qed_qm_pf_rt_init_params {
        u16 num_vf_pqs;
        u8 start_vport;
        u8 num_vports;
-       u8 pf_wfq;
+       u16 pf_wfq;
        u32 pf_rl;
        struct init_qm_pq_params *pq_params;
        struct init_qm_vport_params *vport_params;
@@ -2138,6 +3349,9 @@ void qed_set_geneve_enable(struct qed_hwfn *p_hwfn,
 #define        TSTORM_PORT_STAT_OFFSET(port_id) \
        (IRO[1].base + ((port_id) * IRO[1].m1))
 #define        TSTORM_PORT_STAT_SIZE                           (IRO[1].size)
+#define TSTORM_LL2_PORT_STAT_OFFSET(port_id) \
+       (IRO[2].base + ((port_id) * IRO[2].m1))
+#define TSTORM_LL2_PORT_STAT_SIZE                      (IRO[2].size)
 #define        USTORM_VF_PF_CHANNEL_READY_OFFSET(vf_id) \
        (IRO[3].base + ((vf_id) * IRO[3].m1))
 #define        USTORM_VF_PF_CHANNEL_READY_SIZE                 (IRO[3].size)
@@ -2153,42 +3367,90 @@ void qed_set_geneve_enable(struct qed_hwfn *p_hwfn,
 #define        USTORM_COMMON_QUEUE_CONS_OFFSET(queue_zone_id) \
        (IRO[7].base + ((queue_zone_id) * IRO[7].m1))
 #define        USTORM_COMMON_QUEUE_CONS_SIZE                   (IRO[7].size)
+#define TSTORM_LL2_RX_PRODS_OFFSET(core_rx_queue_id) \
+       (IRO[14].base + ((core_rx_queue_id) * IRO[14].m1))
+#define TSTORM_LL2_RX_PRODS_SIZE                       (IRO[14].size)
+#define CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) \
+       (IRO[15].base + ((core_rx_queue_id) * IRO[15].m1))
+#define CORE_LL2_TSTORM_PER_QUEUE_STAT_SIZE            (IRO[15].size)
+#define CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) \
+       (IRO[16].base + ((core_rx_queue_id) * IRO[16].m1))
+#define CORE_LL2_USTORM_PER_QUEUE_STAT_SIZE            (IRO[16].size)
+#define CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(core_tx_stats_id) \
+       (IRO[17].base + ((core_tx_stats_id) * IRO[17].m1))
+#define CORE_LL2_PSTORM_PER_QUEUE_STAT_SIZE    (IRO[17].       size)
 #define        MSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
        (IRO[18].base + ((stat_counter_id) * IRO[18].m1))
 #define        MSTORM_QUEUE_STAT_SIZE                          (IRO[18].size)
 #define        MSTORM_ETH_PF_PRODS_OFFSET(queue_id) \
        (IRO[19].base + ((queue_id) * IRO[19].m1))
 #define        MSTORM_ETH_PF_PRODS_SIZE                        (IRO[19].size)
-#define        MSTORM_TPA_TIMEOUT_US_OFFSET                    (IRO[20].base)
-#define        MSTORM_TPA_TIMEOUT_US_SIZE                      (IRO[20].size)
+#define MSTORM_ETH_VF_PRODS_OFFSET(vf_id, vf_queue_id) \
+       (IRO[20].base + ((vf_id) * IRO[20].m1) + ((vf_queue_id) * IRO[20].m2))
+#define MSTORM_ETH_VF_PRODS_SIZE                       (IRO[20].size)
+#define        MSTORM_TPA_TIMEOUT_US_OFFSET                    (IRO[21].base)
+#define        MSTORM_TPA_TIMEOUT_US_SIZE                      (IRO[21].size)
 #define        MSTORM_ETH_PF_STAT_OFFSET(pf_id) \
-       (IRO[21].base + ((pf_id) * IRO[21].m1))
+       (IRO[22].base + ((pf_id) * IRO[22].m1))
 #define        MSTORM_ETH_PF_STAT_SIZE                         (IRO[21].size)
 #define        USTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
-       (IRO[22].base + ((stat_counter_id) * IRO[22].m1))
-#define        USTORM_QUEUE_STAT_SIZE                          (IRO[22].size)
+       (IRO[23].base + ((stat_counter_id) * IRO[23].m1))
+#define        USTORM_QUEUE_STAT_SIZE                          (IRO[23].size)
 #define        USTORM_ETH_PF_STAT_OFFSET(pf_id) \
-       (IRO[23].base + ((pf_id) * IRO[23].m1))
-#define        USTORM_ETH_PF_STAT_SIZE                         (IRO[23].size)
+       (IRO[24].base + ((pf_id) * IRO[24].m1))
+#define        USTORM_ETH_PF_STAT_SIZE                         (IRO[24].size)
 #define        PSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
-       (IRO[24].base + ((stat_counter_id) * IRO[24].m1))
-#define        PSTORM_QUEUE_STAT_SIZE                          (IRO[24].size)
+       (IRO[25].base + ((stat_counter_id) * IRO[25].m1))
+#define        PSTORM_QUEUE_STAT_SIZE                          (IRO[25].size)
 #define        PSTORM_ETH_PF_STAT_OFFSET(pf_id) \
-       (IRO[25].base + ((pf_id) * IRO[25].m1))
-#define        PSTORM_ETH_PF_STAT_SIZE                         (IRO[25].size)
+       (IRO[26].base + ((pf_id) * IRO[26].m1))
+#define        PSTORM_ETH_PF_STAT_SIZE                         (IRO[26].size)
 #define        PSTORM_CTL_FRAME_ETHTYPE_OFFSET(ethtype) \
-       (IRO[26].base + ((ethtype) * IRO[26].m1))
-#define        PSTORM_CTL_FRAME_ETHTYPE_SIZE                   (IRO[26].size)
-#define        TSTORM_ETH_PRS_INPUT_OFFSET                     (IRO[27].base)
-#define        TSTORM_ETH_PRS_INPUT_SIZE                       (IRO[27].size)
+       (IRO[27].base + ((ethtype) * IRO[27].m1))
+#define        PSTORM_CTL_FRAME_ETHTYPE_SIZE                   (IRO[27].size)
+#define        TSTORM_ETH_PRS_INPUT_OFFSET                     (IRO[28].base)
+#define        TSTORM_ETH_PRS_INPUT_SIZE                       (IRO[28].size)
 #define        ETH_RX_RATE_LIMIT_OFFSET(pf_id) \
-       (IRO[28].base + ((pf_id) * IRO[28].m1))
-#define        ETH_RX_RATE_LIMIT_SIZE                          (IRO[28].size)
+       (IRO[29].base + ((pf_id) * IRO[29].m1))
+#define        ETH_RX_RATE_LIMIT_SIZE                          (IRO[29].size)
 #define        XSTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) \
-       (IRO[29].base + ((queue_id) * IRO[29].m1))
-#define        XSTORM_ETH_QUEUE_ZONE_SIZE                      (IRO[29].size)
-
-static const struct iro iro_arr[46] = {
+       (IRO[30].base + ((queue_id) * IRO[30].m1))
+#define        XSTORM_ETH_QUEUE_ZONE_SIZE                      (IRO[30].size)
+#define TSTORM_SCSI_CMDQ_CONS_OFFSET(cmdq_queue_id) \
+       (IRO[34].base + ((cmdq_queue_id) * IRO[34].m1))
+#define TSTORM_SCSI_CMDQ_CONS_SIZE                             (IRO[34].size)
+#define TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \
+       (IRO[35].base + ((func_id) * IRO[35].m1) + ((bdq_id) * IRO[35].m2))
+#define TSTORM_SCSI_BDQ_EXT_PROD_SIZE                          (IRO[35].size)
+#define MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \
+       (IRO[36].base + ((func_id) * IRO[36].m1) + ((bdq_id) * IRO[36].m2))
+#define MSTORM_SCSI_BDQ_EXT_PROD_SIZE                          (IRO[36].size)
+#define TSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
+       (IRO[37].base + ((pf_id) * IRO[37].m1))
+#define TSTORM_ISCSI_RX_STATS_SIZE                             (IRO[37].size)
+#define MSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
+       (IRO[38].base + ((pf_id) * IRO[38].m1))
+#define MSTORM_ISCSI_RX_STATS_SIZE                             (IRO[38].size)
+#define USTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
+       (IRO[39].base + ((pf_id) * IRO[39].m1))
+#define USTORM_ISCSI_RX_STATS_SIZE                             (IRO[39].size)
+#define XSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
+       (IRO[40].base + ((pf_id) * IRO[40].m1))
+#define XSTORM_ISCSI_TX_STATS_SIZE                             (IRO[40].size)
+#define YSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
+       (IRO[41].base + ((pf_id) * IRO[41].m1))
+#define YSTORM_ISCSI_TX_STATS_SIZE                             (IRO[41].size)
+#define PSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
+       (IRO[42].base + ((pf_id) * IRO[42].m1))
+#define PSTORM_ISCSI_TX_STATS_SIZE                             (IRO[42].size)
+#define PSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) \
+       (IRO[45].base + ((rdma_stat_counter_id) * IRO[45].m1))
+#define PSTORM_RDMA_QUEUE_STAT_SIZE                            (IRO[45].size)
+#define TSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) \
+       (IRO[46].base + ((rdma_stat_counter_id) * IRO[46].m1))
+#define TSTORM_RDMA_QUEUE_STAT_SIZE                            (IRO[46].size)
+
+static const struct iro iro_arr[47] = {
        {0x0, 0x0, 0x0, 0x0, 0x8},
        {0x4cb0, 0x78, 0x0, 0x0, 0x78},
        {0x6318, 0x20, 0x0, 0x0, 0x20},
@@ -2201,20 +3463,21 @@ static const struct iro iro_arr[46] = {
        {0x3df0, 0x0, 0x0, 0x0, 0x78},
        {0x29b0, 0x0, 0x0, 0x0, 0x78},
        {0x4c38, 0x0, 0x0, 0x0, 0x78},
-       {0x4a48, 0x0, 0x0, 0x0, 0x78},
+       {0x4990, 0x0, 0x0, 0x0, 0x78},
        {0x7e48, 0x0, 0x0, 0x0, 0x78},
        {0xa28, 0x8, 0x0, 0x0, 0x8},
        {0x60f8, 0x10, 0x0, 0x0, 0x10},
        {0xb820, 0x30, 0x0, 0x0, 0x30},
        {0x95b8, 0x30, 0x0, 0x0, 0x30},
-       {0x4c18, 0x80, 0x0, 0x0, 0x40},
+       {0x4b60, 0x80, 0x0, 0x0, 0x40},
        {0x1f8, 0x4, 0x0, 0x0, 0x4},
-       {0xc9a8, 0x0, 0x0, 0x0, 0x4},
-       {0x4c58, 0x80, 0x0, 0x0, 0x20},
+       {0x53a0, 0x80, 0x4, 0x0, 0x4},
+       {0xc8f0, 0x0, 0x0, 0x0, 0x4},
+       {0x4ba0, 0x80, 0x0, 0x0, 0x20},
        {0x8050, 0x40, 0x0, 0x0, 0x30},
        {0xe770, 0x60, 0x0, 0x0, 0x60},
        {0x2b48, 0x80, 0x0, 0x0, 0x38},
-       {0xdf88, 0x78, 0x0, 0x0, 0x78},
+       {0xf188, 0x78, 0x0, 0x0, 0x78},
        {0x1f8, 0x4, 0x0, 0x0, 0x4},
        {0xacf0, 0x0, 0x0, 0x0, 0xf0},
        {0xade0, 0x8, 0x0, 0x0, 0x8},
@@ -2226,455 +3489,457 @@ static const struct iro iro_arr[46] = {
        {0x200, 0x10, 0x8, 0x0, 0x8},
        {0xb78, 0x10, 0x8, 0x0, 0x2},
        {0xd888, 0x38, 0x0, 0x0, 0x24},
-       {0x12120, 0x10, 0x0, 0x0, 0x8},
-       {0x11b20, 0x38, 0x0, 0x0, 0x18},
+       {0x12c38, 0x10, 0x0, 0x0, 0x8},
+       {0x11aa0, 0x38, 0x0, 0x0, 0x18},
        {0xa8c0, 0x30, 0x0, 0x0, 0x10},
        {0x86f8, 0x28, 0x0, 0x0, 0x18},
-       {0xeff8, 0x10, 0x0, 0x0, 0x10},
+       {0x101f8, 0x10, 0x0, 0x0, 0x10},
        {0xdd08, 0x48, 0x0, 0x0, 0x38},
-       {0xf460, 0x20, 0x0, 0x0, 0x20},
+       {0x10660, 0x20, 0x0, 0x0, 0x20},
        {0x2b80, 0x80, 0x0, 0x0, 0x10},
        {0x5000, 0x10, 0x0, 0x0, 0x10},
 };
 
 /* Runtime array offsets */
-#define DORQ_REG_PF_MAX_ICID_0_RT_OFFSET 0
-#define DORQ_REG_PF_MAX_ICID_1_RT_OFFSET 1
-#define DORQ_REG_PF_MAX_ICID_2_RT_OFFSET 2
-#define DORQ_REG_PF_MAX_ICID_3_RT_OFFSET 3
-#define DORQ_REG_PF_MAX_ICID_4_RT_OFFSET 4
-#define DORQ_REG_PF_MAX_ICID_5_RT_OFFSET 5
-#define DORQ_REG_PF_MAX_ICID_6_RT_OFFSET 6
-#define DORQ_REG_PF_MAX_ICID_7_RT_OFFSET 7
-#define DORQ_REG_VF_MAX_ICID_0_RT_OFFSET 8
-#define DORQ_REG_VF_MAX_ICID_1_RT_OFFSET 9
-#define DORQ_REG_VF_MAX_ICID_2_RT_OFFSET 10
-#define DORQ_REG_VF_MAX_ICID_3_RT_OFFSET 11
-#define DORQ_REG_VF_MAX_ICID_4_RT_OFFSET 12
-#define DORQ_REG_VF_MAX_ICID_5_RT_OFFSET 13
-#define DORQ_REG_VF_MAX_ICID_6_RT_OFFSET 14
-#define DORQ_REG_VF_MAX_ICID_7_RT_OFFSET 15
-#define DORQ_REG_PF_WAKE_ALL_RT_OFFSET 16
-#define DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET 17
-#define IGU_REG_PF_CONFIGURATION_RT_OFFSET 18
-#define IGU_REG_VF_CONFIGURATION_RT_OFFSET 19
-#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET 20
-#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET 21
-#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET 22
-#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET 23
-#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET 24
-#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 761
-#define CAU_REG_SB_VAR_MEMORY_RT_SIZE 736
-#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 761
-#define CAU_REG_SB_VAR_MEMORY_RT_SIZE 736
-#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET 1497
-#define CAU_REG_SB_ADDR_MEMORY_RT_SIZE 736
-#define CAU_REG_PI_MEMORY_RT_OFFSET 2233
-#define CAU_REG_PI_MEMORY_RT_SIZE 4416
-#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET 6649
-#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET 6650
-#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET 6651
-#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET 6652
-#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET 6653
-#define PRS_REG_SEARCH_TCP_RT_OFFSET 6654
-#define PRS_REG_SEARCH_FCOE_RT_OFFSET 6655
-#define PRS_REG_SEARCH_ROCE_RT_OFFSET 6656
-#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET 6657
-#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET 6658
-#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET 6659
-#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET 6660
-#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET 6661
-#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET 6662
-#define PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET 6663
-#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET 6664
-#define SRC_REG_FIRSTFREE_RT_OFFSET 6665
-#define SRC_REG_FIRSTFREE_RT_SIZE 2
-#define SRC_REG_LASTFREE_RT_OFFSET 6667
-#define SRC_REG_LASTFREE_RT_SIZE 2
-#define SRC_REG_COUNTFREE_RT_OFFSET 6669
-#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET 6670
-#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET 6671
-#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET 6672
-#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET 6673
-#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET 6674
-#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET 6675
-#define PSWRQ2_REG_TSDM_P_SIZE_RT_OFFSET 6676
-#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET 6677
-#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET 6678
-#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET 6679
-#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET 6680
-#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET 6681
-#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET 6682
-#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET 6683
-#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET 6684
-#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET 6685
-#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET 6686
-#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET 6687
-#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET 6688
-#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6689
-#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6690
-#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6691
-#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET 6692
-#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET 6693
-#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET 6694
-#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET 6695
-#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET 6696
-#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET 6697
-#define PSWRQ2_REG_VF_BASE_RT_OFFSET 6698
-#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET 6699
-#define PSWRQ2_REG_WR_MBS0_RT_OFFSET 6700
-#define PSWRQ2_REG_RD_MBS0_RT_OFFSET 6701
-#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET 6702
-#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET 6703
-#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET 6704
-#define PSWRQ2_REG_ILT_MEMORY_RT_SIZE 22000
-#define PGLUE_REG_B_VF_BASE_RT_OFFSET 28704
-#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET 28705
-#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET 28706
-#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET 28707
-#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET 28708
-#define TM_REG_VF_ENABLE_CONN_RT_OFFSET 28709
-#define TM_REG_PF_ENABLE_CONN_RT_OFFSET 28710
-#define TM_REG_PF_ENABLE_TASK_RT_OFFSET 28711
-#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET 28712
-#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET 28713
-#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET 28714
-#define TM_REG_CONFIG_CONN_MEM_RT_SIZE 416
-#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET 29130
-#define TM_REG_CONFIG_TASK_MEM_RT_SIZE 512
-#define QM_REG_MAXPQSIZE_0_RT_OFFSET 29642
-#define QM_REG_MAXPQSIZE_1_RT_OFFSET 29643
-#define QM_REG_MAXPQSIZE_2_RT_OFFSET 29644
-#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET 29645
-#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET 29646
-#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET 29647
-#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET 29648
-#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET 29649
-#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET 29650
-#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET 29651
-#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET 29652
-#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET 29653
-#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET 29654
-#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET 29655
-#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET 29656
-#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET 29657
-#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET 29658
-#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET 29659
-#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET 29660
-#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET 29661
-#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET 29662
-#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET 29663
-#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET 29664
-#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET 29665
-#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET 29666
-#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET 29667
-#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET 29668
-#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET 29669
-#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET 29670
-#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET 29671
-#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET 29672
-#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET 29673
-#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET 29674
-#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET 29675
-#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET 29676
-#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET 29677
-#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET 29678
-#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET 29679
-#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET 29680
-#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET 29681
-#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET 29682
-#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET 29683
-#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET 29684
-#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET 29685
-#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET 29686
-#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET 29687
-#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET 29688
-#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET 29689
-#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET 29690
-#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET 29691
-#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET 29692
-#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET 29693
-#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET 29694
-#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET 29695
-#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET 29696
-#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET 29697
-#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET 29698
-#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET 29699
-#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET 29700
-#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET 29701
-#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET 29702
-#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET 29703
-#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET 29704
-#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET 29705
-#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET 29706
-#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET 29707
-#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET 29708
-#define QM_REG_BASEADDROTHERPQ_RT_OFFSET 29709
-#define QM_REG_BASEADDROTHERPQ_RT_SIZE 128
-#define QM_REG_VOQCRDLINE_RT_OFFSET 29837
-#define QM_REG_VOQCRDLINE_RT_SIZE 20
-#define QM_REG_VOQINITCRDLINE_RT_OFFSET 29857
-#define QM_REG_VOQINITCRDLINE_RT_SIZE 20
-#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET 29877
-#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET 29878
-#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET 29879
-#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET 29880
-#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET 29881
-#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET 29882
-#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET 29883
-#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET 29884
-#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET 29885
-#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET 29886
-#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET 29887
-#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET 29888
-#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET 29889
-#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET 29890
-#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET 29891
-#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET 29892
-#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET 29893
-#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET 29894
-#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET 29895
-#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET 29896
-#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET 29897
-#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET 29898
-#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET 29899
-#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET 29900
-#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET 29901
-#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET 29902
-#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET 29903
-#define QM_REG_PQTX2PF_0_RT_OFFSET 29904
-#define QM_REG_PQTX2PF_1_RT_OFFSET 29905
-#define QM_REG_PQTX2PF_2_RT_OFFSET 29906
-#define QM_REG_PQTX2PF_3_RT_OFFSET 29907
-#define QM_REG_PQTX2PF_4_RT_OFFSET 29908
-#define QM_REG_PQTX2PF_5_RT_OFFSET 29909
-#define QM_REG_PQTX2PF_6_RT_OFFSET 29910
-#define QM_REG_PQTX2PF_7_RT_OFFSET 29911
-#define QM_REG_PQTX2PF_8_RT_OFFSET 29912
-#define QM_REG_PQTX2PF_9_RT_OFFSET 29913
-#define QM_REG_PQTX2PF_10_RT_OFFSET 29914
-#define QM_REG_PQTX2PF_11_RT_OFFSET 29915
-#define QM_REG_PQTX2PF_12_RT_OFFSET 29916
-#define QM_REG_PQTX2PF_13_RT_OFFSET 29917
-#define QM_REG_PQTX2PF_14_RT_OFFSET 29918
-#define QM_REG_PQTX2PF_15_RT_OFFSET 29919
-#define QM_REG_PQTX2PF_16_RT_OFFSET 29920
-#define QM_REG_PQTX2PF_17_RT_OFFSET 29921
-#define QM_REG_PQTX2PF_18_RT_OFFSET 29922
-#define QM_REG_PQTX2PF_19_RT_OFFSET 29923
-#define QM_REG_PQTX2PF_20_RT_OFFSET 29924
-#define QM_REG_PQTX2PF_21_RT_OFFSET 29925
-#define QM_REG_PQTX2PF_22_RT_OFFSET 29926
-#define QM_REG_PQTX2PF_23_RT_OFFSET 29927
-#define QM_REG_PQTX2PF_24_RT_OFFSET 29928
-#define QM_REG_PQTX2PF_25_RT_OFFSET 29929
-#define QM_REG_PQTX2PF_26_RT_OFFSET 29930
-#define QM_REG_PQTX2PF_27_RT_OFFSET 29931
-#define QM_REG_PQTX2PF_28_RT_OFFSET 29932
-#define QM_REG_PQTX2PF_29_RT_OFFSET 29933
-#define QM_REG_PQTX2PF_30_RT_OFFSET 29934
-#define QM_REG_PQTX2PF_31_RT_OFFSET 29935
-#define QM_REG_PQTX2PF_32_RT_OFFSET 29936
-#define QM_REG_PQTX2PF_33_RT_OFFSET 29937
-#define QM_REG_PQTX2PF_34_RT_OFFSET 29938
-#define QM_REG_PQTX2PF_35_RT_OFFSET 29939
-#define QM_REG_PQTX2PF_36_RT_OFFSET 29940
-#define QM_REG_PQTX2PF_37_RT_OFFSET 29941
-#define QM_REG_PQTX2PF_38_RT_OFFSET 29942
-#define QM_REG_PQTX2PF_39_RT_OFFSET 29943
-#define QM_REG_PQTX2PF_40_RT_OFFSET 29944
-#define QM_REG_PQTX2PF_41_RT_OFFSET 29945
-#define QM_REG_PQTX2PF_42_RT_OFFSET 29946
-#define QM_REG_PQTX2PF_43_RT_OFFSET 29947
-#define QM_REG_PQTX2PF_44_RT_OFFSET 29948
-#define QM_REG_PQTX2PF_45_RT_OFFSET 29949
-#define QM_REG_PQTX2PF_46_RT_OFFSET 29950
-#define QM_REG_PQTX2PF_47_RT_OFFSET 29951
-#define QM_REG_PQTX2PF_48_RT_OFFSET 29952
-#define QM_REG_PQTX2PF_49_RT_OFFSET 29953
-#define QM_REG_PQTX2PF_50_RT_OFFSET 29954
-#define QM_REG_PQTX2PF_51_RT_OFFSET 29955
-#define QM_REG_PQTX2PF_52_RT_OFFSET 29956
-#define QM_REG_PQTX2PF_53_RT_OFFSET 29957
-#define QM_REG_PQTX2PF_54_RT_OFFSET 29958
-#define QM_REG_PQTX2PF_55_RT_OFFSET 29959
-#define QM_REG_PQTX2PF_56_RT_OFFSET 29960
-#define QM_REG_PQTX2PF_57_RT_OFFSET 29961
-#define QM_REG_PQTX2PF_58_RT_OFFSET 29962
-#define QM_REG_PQTX2PF_59_RT_OFFSET 29963
-#define QM_REG_PQTX2PF_60_RT_OFFSET 29964
-#define QM_REG_PQTX2PF_61_RT_OFFSET 29965
-#define QM_REG_PQTX2PF_62_RT_OFFSET 29966
-#define QM_REG_PQTX2PF_63_RT_OFFSET 29967
-#define QM_REG_PQOTHER2PF_0_RT_OFFSET 29968
-#define QM_REG_PQOTHER2PF_1_RT_OFFSET 29969
-#define QM_REG_PQOTHER2PF_2_RT_OFFSET 29970
-#define QM_REG_PQOTHER2PF_3_RT_OFFSET 29971
-#define QM_REG_PQOTHER2PF_4_RT_OFFSET 29972
-#define QM_REG_PQOTHER2PF_5_RT_OFFSET 29973
-#define QM_REG_PQOTHER2PF_6_RT_OFFSET 29974
-#define QM_REG_PQOTHER2PF_7_RT_OFFSET 29975
-#define QM_REG_PQOTHER2PF_8_RT_OFFSET 29976
-#define QM_REG_PQOTHER2PF_9_RT_OFFSET 29977
-#define QM_REG_PQOTHER2PF_10_RT_OFFSET 29978
-#define QM_REG_PQOTHER2PF_11_RT_OFFSET 29979
-#define QM_REG_PQOTHER2PF_12_RT_OFFSET 29980
-#define QM_REG_PQOTHER2PF_13_RT_OFFSET 29981
-#define QM_REG_PQOTHER2PF_14_RT_OFFSET 29982
-#define QM_REG_PQOTHER2PF_15_RT_OFFSET 29983
-#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET 29984
-#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET 29985
-#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET 29986
-#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET 29987
-#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET 29988
-#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET 29989
-#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET 29990
-#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET 29991
-#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET 29992
-#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET 29993
-#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET 29994
-#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET 29995
-#define QM_REG_RLGLBLINCVAL_RT_OFFSET 29996
-#define QM_REG_RLGLBLINCVAL_RT_SIZE 256
-#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET 30252
-#define QM_REG_RLGLBLUPPERBOUND_RT_SIZE 256
-#define QM_REG_RLGLBLCRD_RT_OFFSET 30508
-#define QM_REG_RLGLBLCRD_RT_SIZE 256
-#define QM_REG_RLGLBLENABLE_RT_OFFSET 30764
-#define QM_REG_RLPFPERIOD_RT_OFFSET 30765
-#define QM_REG_RLPFPERIODTIMER_RT_OFFSET 30766
-#define QM_REG_RLPFINCVAL_RT_OFFSET 30767
-#define QM_REG_RLPFINCVAL_RT_SIZE 16
-#define QM_REG_RLPFUPPERBOUND_RT_OFFSET 30783
-#define QM_REG_RLPFUPPERBOUND_RT_SIZE 16
-#define QM_REG_RLPFCRD_RT_OFFSET 30799
-#define QM_REG_RLPFCRD_RT_SIZE 16
-#define QM_REG_RLPFENABLE_RT_OFFSET 30815
-#define QM_REG_RLPFVOQENABLE_RT_OFFSET 30816
-#define QM_REG_WFQPFWEIGHT_RT_OFFSET 30817
-#define QM_REG_WFQPFWEIGHT_RT_SIZE 16
-#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET 30833
-#define QM_REG_WFQPFUPPERBOUND_RT_SIZE 16
-#define QM_REG_WFQPFCRD_RT_OFFSET 30849
-#define QM_REG_WFQPFCRD_RT_SIZE 160
-#define QM_REG_WFQPFENABLE_RT_OFFSET 31009
-#define QM_REG_WFQVPENABLE_RT_OFFSET 31010
-#define QM_REG_BASEADDRTXPQ_RT_OFFSET 31011
-#define QM_REG_BASEADDRTXPQ_RT_SIZE 512
-#define QM_REG_TXPQMAP_RT_OFFSET 31523
-#define QM_REG_TXPQMAP_RT_SIZE 512
-#define QM_REG_WFQVPWEIGHT_RT_OFFSET 32035
-#define QM_REG_WFQVPWEIGHT_RT_SIZE 512
-#define QM_REG_WFQVPCRD_RT_OFFSET 32547
-#define QM_REG_WFQVPCRD_RT_SIZE 512
-#define QM_REG_WFQVPMAP_RT_OFFSET 33059
-#define QM_REG_WFQVPMAP_RT_SIZE 512
-#define QM_REG_WFQPFCRD_MSB_RT_OFFSET 33571
-#define QM_REG_WFQPFCRD_MSB_RT_SIZE 160
-#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET 33731
-#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET 33732
-#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET 33733
-#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET 33734
-#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET 33735
-#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET 33736
-#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET 33737
-#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET 33738
-#define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE 4
-#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET 33742
-#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_SIZE 4
-#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET 33746
-#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE 4
-#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET 33750
-#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET 33751
-#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE 32
-#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET 33783
-#define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE 16
-#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET 33799
-#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE 16
-#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET 33815
-#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE 16
-#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET 33831
-#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE 16
-#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET 33847
-#define NIG_REG_ROCE_DUPLICATE_TO_HOST_RT_OFFSET 33848
-#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET 33849
-#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET 33850
-#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET 33851
-#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET 33852
-#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET 33853
-#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET 33854
-#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET 33855
-#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET 33856
-#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET 33857
-#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET 33858
-#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET 33859
-#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET 33860
-#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET 33861
-#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET 33862
-#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET 33863
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET 33864
-#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET 33865
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET 33866
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET 33867
-#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET 33868
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET 33869
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET 33870
-#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET 33871
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET 33872
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET 33873
-#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET 33874
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET 33875
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET 33876
-#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET 33877
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET 33878
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET 33879
-#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET 33880
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET 33881
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET 33882
-#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET 33883
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET 33884
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET 33885
-#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET 33886
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET 33887
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET 33888
-#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET 33889
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET 33890
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET 33891
-#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET 33892
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET 33893
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET 33894
-#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET 33895
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET 33896
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET 33897
-#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET 33898
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET 33899
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET 33900
-#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET 33901
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET 33902
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET 33903
-#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET 33904
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET 33905
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET 33906
-#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET 33907
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET 33908
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET 33909
-#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET 33910
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET 33911
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET 33912
-#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET 33913
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET 33914
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET 33915
-#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET 33916
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET 33917
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET 33918
-#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET 33919
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET 33920
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET 33921
-#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET 33922
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET 33923
-#define XCM_REG_CON_PHY_Q3_RT_OFFSET 33924
-
-#define RUNTIME_ARRAY_SIZE 33925
+#define DORQ_REG_PF_MAX_ICID_0_RT_OFFSET       0
+#define DORQ_REG_PF_MAX_ICID_1_RT_OFFSET       1
+#define DORQ_REG_PF_MAX_ICID_2_RT_OFFSET       2
+#define DORQ_REG_PF_MAX_ICID_3_RT_OFFSET       3
+#define DORQ_REG_PF_MAX_ICID_4_RT_OFFSET       4
+#define DORQ_REG_PF_MAX_ICID_5_RT_OFFSET       5
+#define DORQ_REG_PF_MAX_ICID_6_RT_OFFSET       6
+#define DORQ_REG_PF_MAX_ICID_7_RT_OFFSET       7
+#define DORQ_REG_VF_MAX_ICID_0_RT_OFFSET       8
+#define DORQ_REG_VF_MAX_ICID_1_RT_OFFSET       9
+#define DORQ_REG_VF_MAX_ICID_2_RT_OFFSET       10
+#define DORQ_REG_VF_MAX_ICID_3_RT_OFFSET       11
+#define DORQ_REG_VF_MAX_ICID_4_RT_OFFSET       12
+#define DORQ_REG_VF_MAX_ICID_5_RT_OFFSET       13
+#define DORQ_REG_VF_MAX_ICID_6_RT_OFFSET       14
+#define DORQ_REG_VF_MAX_ICID_7_RT_OFFSET       15
+#define DORQ_REG_PF_WAKE_ALL_RT_OFFSET 16
+#define DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET      17
+#define IGU_REG_PF_CONFIGURATION_RT_OFFSET     18
+#define IGU_REG_VF_CONFIGURATION_RT_OFFSET     19
+#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET      20
+#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET      21
+#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET   22
+#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET  23
+#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET    24
+#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET        761
+#define CAU_REG_SB_VAR_MEMORY_RT_SIZE  736
+#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET        761
+#define CAU_REG_SB_VAR_MEMORY_RT_SIZE  736
+#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET       1497
+#define CAU_REG_SB_ADDR_MEMORY_RT_SIZE 736
+#define CAU_REG_PI_MEMORY_RT_OFFSET    2233
+#define CAU_REG_PI_MEMORY_RT_SIZE      4416
+#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET   6649
+#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET     6650
+#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET     6651
+#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET        6652
+#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET        6653
+#define PRS_REG_SEARCH_TCP_RT_OFFSET   6654
+#define PRS_REG_SEARCH_FCOE_RT_OFFSET  6655
+#define PRS_REG_SEARCH_ROCE_RT_OFFSET  6656
+#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET  6657
+#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET  6658
+#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET      6659
+#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET    6660
+#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET  6661
+#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET     6662
+#define PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET      6663
+#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET        6664
+#define SRC_REG_FIRSTFREE_RT_OFFSET    6665
+#define SRC_REG_FIRSTFREE_RT_SIZE      2
+#define SRC_REG_LASTFREE_RT_OFFSET     6667
+#define SRC_REG_LASTFREE_RT_SIZE       2
+#define SRC_REG_COUNTFREE_RT_OFFSET    6669
+#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET     6670
+#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET       6671
+#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET       6672
+#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET 6673
+#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET 6674
+#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET        6675
+#define PSWRQ2_REG_TSDM_P_SIZE_RT_OFFSET       6676
+#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET      6677
+#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET       6678
+#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET      6679
+#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET       6680
+#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET     6681
+#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET      6682
+#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET    6683
+#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET     6684
+#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET    6685
+#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET     6686
+#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET    6687
+#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET     6688
+#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET    6689
+#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET  6690
+#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET  6691
+#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET      6692
+#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET    6693
+#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET    6694
+#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET  6695
+#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET        6696
+#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET        6697
+#define PSWRQ2_REG_VF_BASE_RT_OFFSET   6698
+#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET       6699
+#define PSWRQ2_REG_WR_MBS0_RT_OFFSET   6700
+#define PSWRQ2_REG_RD_MBS0_RT_OFFSET   6701
+#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET     6702
+#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET     6703
+#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET        6704
+#define PSWRQ2_REG_ILT_MEMORY_RT_SIZE  22000
+#define PGLUE_REG_B_VF_BASE_RT_OFFSET  28704
+#define PGLUE_REG_B_MSDM_OFFSET_MASK_B_RT_OFFSET       28705
+#define PGLUE_REG_B_MSDM_VF_SHIFT_B_RT_OFFSET  28706
+#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET  28707
+#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET     28708
+#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET     28709
+#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET     28710
+#define TM_REG_VF_ENABLE_CONN_RT_OFFSET        28711
+#define TM_REG_PF_ENABLE_CONN_RT_OFFSET        28712
+#define TM_REG_PF_ENABLE_TASK_RT_OFFSET        28713
+#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET    28714
+#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET    28715
+#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET       28716
+#define TM_REG_CONFIG_CONN_MEM_RT_SIZE 416
+#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET       29132
+#define TM_REG_CONFIG_TASK_MEM_RT_SIZE 512
+#define QM_REG_MAXPQSIZE_0_RT_OFFSET   29644
+#define QM_REG_MAXPQSIZE_1_RT_OFFSET   29645
+#define QM_REG_MAXPQSIZE_2_RT_OFFSET   29646
+#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET      29647
+#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET      29648
+#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET      29649
+#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET      29650
+#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET      29651
+#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET      29652
+#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET      29653
+#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET      29654
+#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET      29655
+#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET      29656
+#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET     29657
+#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET     29658
+#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET     29659
+#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET     29660
+#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET     29661
+#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET     29662
+#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET     29663
+#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET     29664
+#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET     29665
+#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET     29666
+#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET     29667
+#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET     29668
+#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET     29669
+#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET     29670
+#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET     29671
+#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET     29672
+#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET     29673
+#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET     29674
+#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET     29675
+#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET     29676
+#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET     29677
+#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET     29678
+#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET     29679
+#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET     29680
+#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET     29681
+#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET     29682
+#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET     29683
+#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET     29684
+#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET     29685
+#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET     29686
+#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET     29687
+#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET     29688
+#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET     29689
+#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET     29690
+#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET     29691
+#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET     29692
+#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET     29693
+#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET     29694
+#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET     29695
+#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET     29696
+#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET     29697
+#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET     29698
+#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET     29699
+#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET     29700
+#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET     29701
+#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET     29702
+#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET     29703
+#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET     29704
+#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET     29705
+#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET     29706
+#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET     29707
+#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET     29708
+#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET     29709
+#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET     29710
+#define QM_REG_BASEADDROTHERPQ_RT_OFFSET       29711
+#define QM_REG_BASEADDROTHERPQ_RT_SIZE 128
+#define QM_REG_VOQCRDLINE_RT_OFFSET    29839
+#define QM_REG_VOQCRDLINE_RT_SIZE      20
+#define QM_REG_VOQINITCRDLINE_RT_OFFSET        29859
+#define QM_REG_VOQINITCRDLINE_RT_SIZE  20
+#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET    29879
+#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET    29880
+#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET     29881
+#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET   29882
+#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET  29883
+#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET       29884
+#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET       29885
+#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET       29886
+#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET       29887
+#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET       29888
+#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET       29889
+#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET       29890
+#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET       29891
+#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET       29892
+#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET       29893
+#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET      29894
+#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET      29895
+#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET      29896
+#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET      29897
+#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET      29898
+#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET      29899
+#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET   29900
+#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET   29901
+#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET   29902
+#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET   29903
+#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET      29904
+#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET      29905
+#define QM_REG_PQTX2PF_0_RT_OFFSET     29906
+#define QM_REG_PQTX2PF_1_RT_OFFSET     29907
+#define QM_REG_PQTX2PF_2_RT_OFFSET     29908
+#define QM_REG_PQTX2PF_3_RT_OFFSET     29909
+#define QM_REG_PQTX2PF_4_RT_OFFSET     29910
+#define QM_REG_PQTX2PF_5_RT_OFFSET     29911
+#define QM_REG_PQTX2PF_6_RT_OFFSET     29912
+#define QM_REG_PQTX2PF_7_RT_OFFSET     29913
+#define QM_REG_PQTX2PF_8_RT_OFFSET     29914
+#define QM_REG_PQTX2PF_9_RT_OFFSET     29915
+#define QM_REG_PQTX2PF_10_RT_OFFSET    29916
+#define QM_REG_PQTX2PF_11_RT_OFFSET    29917
+#define QM_REG_PQTX2PF_12_RT_OFFSET    29918
+#define QM_REG_PQTX2PF_13_RT_OFFSET    29919
+#define QM_REG_PQTX2PF_14_RT_OFFSET    29920
+#define QM_REG_PQTX2PF_15_RT_OFFSET    29921
+#define QM_REG_PQTX2PF_16_RT_OFFSET    29922
+#define QM_REG_PQTX2PF_17_RT_OFFSET    29923
+#define QM_REG_PQTX2PF_18_RT_OFFSET    29924
+#define QM_REG_PQTX2PF_19_RT_OFFSET    29925
+#define QM_REG_PQTX2PF_20_RT_OFFSET    29926
+#define QM_REG_PQTX2PF_21_RT_OFFSET    29927
+#define QM_REG_PQTX2PF_22_RT_OFFSET    29928
+#define QM_REG_PQTX2PF_23_RT_OFFSET    29929
+#define QM_REG_PQTX2PF_24_RT_OFFSET    29930
+#define QM_REG_PQTX2PF_25_RT_OFFSET    29931
+#define QM_REG_PQTX2PF_26_RT_OFFSET    29932
+#define QM_REG_PQTX2PF_27_RT_OFFSET    29933
+#define QM_REG_PQTX2PF_28_RT_OFFSET    29934
+#define QM_REG_PQTX2PF_29_RT_OFFSET    29935
+#define QM_REG_PQTX2PF_30_RT_OFFSET    29936
+#define QM_REG_PQTX2PF_31_RT_OFFSET    29937
+#define QM_REG_PQTX2PF_32_RT_OFFSET    29938
+#define QM_REG_PQTX2PF_33_RT_OFFSET    29939
+#define QM_REG_PQTX2PF_34_RT_OFFSET    29940
+#define QM_REG_PQTX2PF_35_RT_OFFSET    29941
+#define QM_REG_PQTX2PF_36_RT_OFFSET    29942
+#define QM_REG_PQTX2PF_37_RT_OFFSET    29943
+#define QM_REG_PQTX2PF_38_RT_OFFSET    29944
+#define QM_REG_PQTX2PF_39_RT_OFFSET    29945
+#define QM_REG_PQTX2PF_40_RT_OFFSET    29946
+#define QM_REG_PQTX2PF_41_RT_OFFSET    29947
+#define QM_REG_PQTX2PF_42_RT_OFFSET    29948
+#define QM_REG_PQTX2PF_43_RT_OFFSET    29949
+#define QM_REG_PQTX2PF_44_RT_OFFSET    29950
+#define QM_REG_PQTX2PF_45_RT_OFFSET    29951
+#define QM_REG_PQTX2PF_46_RT_OFFSET    29952
+#define QM_REG_PQTX2PF_47_RT_OFFSET    29953
+#define QM_REG_PQTX2PF_48_RT_OFFSET    29954
+#define QM_REG_PQTX2PF_49_RT_OFFSET    29955
+#define QM_REG_PQTX2PF_50_RT_OFFSET    29956
+#define QM_REG_PQTX2PF_51_RT_OFFSET    29957
+#define QM_REG_PQTX2PF_52_RT_OFFSET    29958
+#define QM_REG_PQTX2PF_53_RT_OFFSET    29959
+#define QM_REG_PQTX2PF_54_RT_OFFSET    29960
+#define QM_REG_PQTX2PF_55_RT_OFFSET    29961
+#define QM_REG_PQTX2PF_56_RT_OFFSET    29962
+#define QM_REG_PQTX2PF_57_RT_OFFSET    29963
+#define QM_REG_PQTX2PF_58_RT_OFFSET    29964
+#define QM_REG_PQTX2PF_59_RT_OFFSET    29965
+#define QM_REG_PQTX2PF_60_RT_OFFSET    29966
+#define QM_REG_PQTX2PF_61_RT_OFFSET    29967
+#define QM_REG_PQTX2PF_62_RT_OFFSET    29968
+#define QM_REG_PQTX2PF_63_RT_OFFSET    29969
+#define QM_REG_PQOTHER2PF_0_RT_OFFSET  29970
+#define QM_REG_PQOTHER2PF_1_RT_OFFSET  29971
+#define QM_REG_PQOTHER2PF_2_RT_OFFSET  29972
+#define QM_REG_PQOTHER2PF_3_RT_OFFSET  29973
+#define QM_REG_PQOTHER2PF_4_RT_OFFSET  29974
+#define QM_REG_PQOTHER2PF_5_RT_OFFSET  29975
+#define QM_REG_PQOTHER2PF_6_RT_OFFSET  29976
+#define QM_REG_PQOTHER2PF_7_RT_OFFSET  29977
+#define QM_REG_PQOTHER2PF_8_RT_OFFSET  29978
+#define QM_REG_PQOTHER2PF_9_RT_OFFSET  29979
+#define QM_REG_PQOTHER2PF_10_RT_OFFSET 29980
+#define QM_REG_PQOTHER2PF_11_RT_OFFSET 29981
+#define QM_REG_PQOTHER2PF_12_RT_OFFSET 29982
+#define QM_REG_PQOTHER2PF_13_RT_OFFSET 29983
+#define QM_REG_PQOTHER2PF_14_RT_OFFSET 29984
+#define QM_REG_PQOTHER2PF_15_RT_OFFSET 29985
+#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET        29986
+#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET        29987
+#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET   29988
+#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET   29989
+#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET     29990
+#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET     29991
+#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET     29992
+#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET     29993
+#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET     29994
+#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET     29995
+#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET     29996
+#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET     29997
+#define QM_REG_RLGLBLINCVAL_RT_OFFSET  29998
+#define QM_REG_RLGLBLINCVAL_RT_SIZE    256
+#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET      30254
+#define QM_REG_RLGLBLUPPERBOUND_RT_SIZE        256
+#define QM_REG_RLGLBLCRD_RT_OFFSET     30510
+#define QM_REG_RLGLBLCRD_RT_SIZE       256
+#define QM_REG_RLGLBLENABLE_RT_OFFSET  30766
+#define QM_REG_RLPFPERIOD_RT_OFFSET    30767
+#define QM_REG_RLPFPERIODTIMER_RT_OFFSET       30768
+#define QM_REG_RLPFINCVAL_RT_OFFSET    30769
+#define QM_REG_RLPFINCVAL_RT_SIZE      16
+#define QM_REG_RLPFUPPERBOUND_RT_OFFSET        30785
+#define QM_REG_RLPFUPPERBOUND_RT_SIZE  16
+#define QM_REG_RLPFCRD_RT_OFFSET       30801
+#define QM_REG_RLPFCRD_RT_SIZE 16
+#define QM_REG_RLPFENABLE_RT_OFFSET    30817
+#define QM_REG_RLPFVOQENABLE_RT_OFFSET 30818
+#define QM_REG_WFQPFWEIGHT_RT_OFFSET   30819
+#define QM_REG_WFQPFWEIGHT_RT_SIZE     16
+#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET       30835
+#define QM_REG_WFQPFUPPERBOUND_RT_SIZE 16
+#define QM_REG_WFQPFCRD_RT_OFFSET      30851
+#define QM_REG_WFQPFCRD_RT_SIZE        160
+#define QM_REG_WFQPFENABLE_RT_OFFSET   31011
+#define QM_REG_WFQVPENABLE_RT_OFFSET   31012
+#define QM_REG_BASEADDRTXPQ_RT_OFFSET  31013
+#define QM_REG_BASEADDRTXPQ_RT_SIZE    512
+#define QM_REG_TXPQMAP_RT_OFFSET       31525
+#define QM_REG_TXPQMAP_RT_SIZE 512
+#define QM_REG_WFQVPWEIGHT_RT_OFFSET   32037
+#define QM_REG_WFQVPWEIGHT_RT_SIZE     512
+#define QM_REG_WFQVPCRD_RT_OFFSET      32549
+#define QM_REG_WFQVPCRD_RT_SIZE        512
+#define QM_REG_WFQVPMAP_RT_OFFSET      33061
+#define QM_REG_WFQVPMAP_RT_SIZE        512
+#define QM_REG_WFQPFCRD_MSB_RT_OFFSET  33573
+#define QM_REG_WFQPFCRD_MSB_RT_SIZE    160
+#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET      33733
+#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET        33734
+#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET        33735
+#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET        33736
+#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET        33737
+#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET 33738
+#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET     33739
+#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET      33740
+#define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE        4
+#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET 33744
+#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_SIZE   4
+#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET   33748
+#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE     4
+#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET      33752
+#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET        33753
+#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE  32
+#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET   33785
+#define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE     16
+#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET 33801
+#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE   16
+#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET        33817
+#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE  16
+#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET      33833
+#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE        16
+#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET 33849
+#define NIG_REG_ROCE_DUPLICATE_TO_HOST_RT_OFFSET       33850
+#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET      33851
+#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET      33852
+#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET      33853
+#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET  33854
+#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET  33855
+#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET  33856
+#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET  33857
+#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET       33858
+#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET       33859
+#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET       33860
+#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET       33861
+#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET   33862
+#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET        33863
+#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET      33864
+#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET 33865
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET       33866
+#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET  33867
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET   33868
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET       33869
+#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET  33870
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET   33871
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET       33872
+#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET  33873
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET   33874
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET       33875
+#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET  33876
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET   33877
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET       33878
+#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET  33879
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET   33880
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET       33881
+#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET  33882
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET   33883
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET       33884
+#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET  33885
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET   33886
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET       33887
+#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET  33888
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET   33889
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET       33890
+#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET  33891
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET   33892
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET       33893
+#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET  33894
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET   33895
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET      33896
+#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET 33897
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET  33898
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET      33899
+#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET 33900
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET  33901
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET      33902
+#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET 33903
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET  33904
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET      33905
+#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET 33906
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET  33907
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET      33908
+#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET 33909
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET  33910
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET      33911
+#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET 33912
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET  33913
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET      33914
+#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET 33915
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET  33916
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET      33917
+#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET 33918
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET  33919
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET      33920
+#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET 33921
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET  33922
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET      33923
+#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET 33924
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET  33925
+#define XCM_REG_CON_PHY_Q3_RT_OFFSET   33926
+
+#define RUNTIME_ARRAY_SIZE 33927
 
 /* The eth storm context for the Tstorm */
 struct tstorm_eth_conn_st_ctx {
@@ -3201,7 +4466,31 @@ struct eth_conn_context {
        struct mstorm_eth_conn_st_ctx mstorm_st_context;
 };
 
-/* opcodes for the event ring */
+enum eth_error_code {
+       ETH_OK = 0x00,
+       ETH_FILTERS_MAC_ADD_FAIL_FULL,
+       ETH_FILTERS_MAC_ADD_FAIL_FULL_MTT2,
+       ETH_FILTERS_MAC_ADD_FAIL_DUP_MTT2,
+       ETH_FILTERS_MAC_ADD_FAIL_DUP_STT2,
+       ETH_FILTERS_MAC_DEL_FAIL_NOF,
+       ETH_FILTERS_MAC_DEL_FAIL_NOF_MTT2,
+       ETH_FILTERS_MAC_DEL_FAIL_NOF_STT2,
+       ETH_FILTERS_MAC_ADD_FAIL_ZERO_MAC,
+       ETH_FILTERS_VLAN_ADD_FAIL_FULL,
+       ETH_FILTERS_VLAN_ADD_FAIL_DUP,
+       ETH_FILTERS_VLAN_DEL_FAIL_NOF,
+       ETH_FILTERS_VLAN_DEL_FAIL_NOF_TT1,
+       ETH_FILTERS_PAIR_ADD_FAIL_DUP,
+       ETH_FILTERS_PAIR_ADD_FAIL_FULL,
+       ETH_FILTERS_PAIR_ADD_FAIL_FULL_MAC,
+       ETH_FILTERS_PAIR_DEL_FAIL_NOF,
+       ETH_FILTERS_PAIR_DEL_FAIL_NOF_TT1,
+       ETH_FILTERS_PAIR_ADD_FAIL_ZERO_MAC,
+       ETH_FILTERS_VNI_ADD_FAIL_FULL,
+       ETH_FILTERS_VNI_ADD_FAIL_DUP,
+       MAX_ETH_ERROR_CODE
+};
+
 enum eth_event_opcode {
        ETH_EVENT_UNUSED,
        ETH_EVENT_VPORT_START,
@@ -3269,7 +4558,13 @@ enum eth_filter_type {
        MAX_ETH_FILTER_TYPE
 };
 
-/* Ethernet Ramrod Command IDs */
+enum eth_ipv4_frag_type {
+       ETH_IPV4_NOT_FRAG,
+       ETH_IPV4_FIRST_FRAG,
+       ETH_IPV4_NON_FIRST_FRAG,
+       MAX_ETH_IPV4_FRAG_TYPE
+};
+
 enum eth_ramrod_cmd_id {
        ETH_RAMROD_UNUSED,
        ETH_RAMROD_VPORT_START,
@@ -3451,8 +4746,8 @@ struct rx_queue_start_ramrod_data {
        u8 toggle_val;
 
        u8 vf_rx_prod_index;
-
-       u8 reserved[6];
+       u8 vf_rx_prod_use_zone_a;
+       u8 reserved[5];
        __le16 reserved1;
        struct regpair cqe_pbl_addr;
        struct regpair bd_base;
@@ -3526,10 +4821,11 @@ struct tx_queue_start_ramrod_data {
        __le16 pxp_st_index;
        __le16 comp_agg_size;
        __le16 queue_zone_id;
-       __le16 test_dup_count;
+       __le16 reserved2;
        __le16 pbl_size;
        __le16 tx_queue_id;
-
+       __le16 same_as_last_id;
+       __le16 reserved[3];
        struct regpair pbl_base_addr;
        struct regpair bd_cons_address;
 };
@@ -4926,8 +6222,8 @@ struct roce_create_qp_resp_ramrod_data {
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_SRQ_FLG_SHIFT             5
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN_MASK  0x1
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN_SHIFT 6
-#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED0_MASK            0x1
-#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED0_SHIFT           7
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_KEY_EN_MASK   0x1
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_KEY_EN_SHIFT  7
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_PRI_MASK                  0x7
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_PRI_SHIFT                 8
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_MASK    0x1F
@@ -4988,6 +6284,10 @@ enum roce_event_opcode {
        MAX_ROCE_EVENT_OPCODE
 };
 
+struct roce_init_func_ramrod_data {
+       struct rdma_init_func_ramrod_data rdma;
+};
+
 struct roce_modify_qp_req_ramrod_data {
        __le16 flags;
 #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_ERR_FLG_MASK      0x1
@@ -6639,6 +7939,35 @@ struct ystorm_iscsi_conn_ag_ctx {
        __le32 reg2;
        __le32 reg3;
 };
+
+#define MFW_TRACE_SIGNATURE     0x25071946
+
+/* The trace in the buffer */
+#define MFW_TRACE_EVENTID_MASK          0x00ffff
+#define MFW_TRACE_PRM_SIZE_MASK         0x0f0000
+#define MFW_TRACE_PRM_SIZE_SHIFT        16
+#define MFW_TRACE_ENTRY_SIZE            3
+
+struct mcp_trace {
+       u32 signature;          /* Help to identify that the trace is valid */
+       u32 size;               /* the size of the trace buffer in bytes */
+       u32 curr_level;         /* 2 - all will be written to the buffer
+                                * 1 - debug trace will not be written
+                                * 0 - just errors will be written to the buffer
+                                */
+       u32 modules_mask[2];    /* a bit per module, 1 means write it, 0 means
+                                * mask it.
+                                */
+
+       /* Warning: the following pointers are assumed to be 32bits as they are
+        * used only in the MFW.
+        */
+       u32 trace_prod; /* The next trace will be written to this offset */
+       u32 trace_oldest; /* The oldest valid trace starts at this offset
+                          * (usually very close after the current producer).
+                          */
+};
+
 #define VF_MAX_STATIC 192
 
 #define MCP_GLOB_PATH_MAX      2
@@ -6646,6 +7975,7 @@ struct ystorm_iscsi_conn_ag_ctx {
 #define MCP_GLOB_PORT_MAX      4
 #define MCP_GLOB_FUNC_MAX      16
 
+typedef u32 offsize_t;         /* In DWORDS !!! */
 /* Offset from the beginning of the MCP scratchpad */
 #define OFFSIZE_OFFSET_SHIFT   0
 #define OFFSIZE_OFFSET_MASK    0x0000ffff
@@ -7236,8 +8566,19 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_NIG_DRAIN                 0x30000000
 #define DRV_MSG_CODE_VF_DISABLED_DONE          0xc0000000
 #define DRV_MSG_CODE_CFG_VF_MSIX               0xc0010000
+#define DRV_MSG_CODE_NVM_GET_FILE_ATT          0x00030000
+#define DRV_MSG_CODE_NVM_READ_NVRAM            0x00050000
 #define DRV_MSG_CODE_MCP_RESET                 0x00090000
 #define DRV_MSG_CODE_SET_VERSION               0x000f0000
+#define DRV_MSG_CODE_MCP_HALT                   0x00100000
+
+#define DRV_MSG_CODE_GET_STATS                  0x00130000
+#define DRV_MSG_CODE_STATS_TYPE_LAN             1
+#define DRV_MSG_CODE_STATS_TYPE_FCOE            2
+#define DRV_MSG_CODE_STATS_TYPE_ISCSI           3
+#define DRV_MSG_CODE_STATS_TYPE_RDMA            4
+
+#define DRV_MSG_CODE_MASK_PARITIES              0x001a0000
 
 #define DRV_MSG_CODE_BIST_TEST                 0x001e0000
 #define DRV_MSG_CODE_SET_LED_MODE              0x00200000
@@ -7248,6 +8589,9 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_UNLOAD_WOL_MCP            0x00000001
 #define DRV_MB_PARAM_DCBX_NOTIFY_MASK          0x000000FF
 #define DRV_MB_PARAM_DCBX_NOTIFY_SHIFT         3
+
+#define DRV_MB_PARAM_NVM_LEN_SHIFT             24
+
 #define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_SHIFT   0
 #define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_MASK    0x000000FF
 #define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_SHIFT  8
@@ -7285,6 +8629,8 @@ struct public_drv_mb {
 #define FW_MSG_CODE_DRV_UNLOAD_FUNCTION                0x20130000
 #define FW_MSG_CODE_DRV_UNLOAD_DONE            0x21100000
 #define FW_MSG_CODE_DRV_CFG_VF_MSIX_DONE       0xb0010000
+
+#define FW_MSG_CODE_NVM_OK                     0x00010000
 #define FW_MSG_CODE_OK                         0x00160000
 
 #define FW_MSG_SEQ_NUMBER_MASK                 0x0000ffff
@@ -7315,10 +8661,10 @@ enum MFW_DRV_MSG_TYPE {
        MFW_DRV_MSG_RESERVED4,
        MFW_DRV_MSG_BW_UPDATE,
        MFW_DRV_MSG_BW_UPDATE5,
-       MFW_DRV_MSG_BW_UPDATE6,
-       MFW_DRV_MSG_BW_UPDATE7,
-       MFW_DRV_MSG_BW_UPDATE8,
-       MFW_DRV_MSG_BW_UPDATE9,
+       MFW_DRV_MSG_GET_LAN_STATS,
+       MFW_DRV_MSG_GET_FCOE_STATS,
+       MFW_DRV_MSG_GET_ISCSI_STATS,
+       MFW_DRV_MSG_GET_RDMA_STATS,
        MFW_DRV_MSG_BW_UPDATE10,
        MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE,
        MFW_DRV_MSG_BW_UPDATE11,
@@ -7521,4 +8867,101 @@ struct nvm_cfg1 {
        struct nvm_cfg1_port port[MCP_GLOB_PORT_MAX];
        struct nvm_cfg1_func func[MCP_GLOB_FUNC_MAX];
 };
+
+enum spad_sections {
+       SPAD_SECTION_TRACE,
+       SPAD_SECTION_NVM_CFG,
+       SPAD_SECTION_PUBLIC,
+       SPAD_SECTION_PRIVATE,
+       SPAD_SECTION_MAX
+};
+
+#define MCP_TRACE_SIZE          2048   /* 2kb */
+
+/* This section is located at a fixed location in the beginning of the
+ * scratchpad, to ensure that the MCP trace is not run over during MFW upgrade.
+ * All the rest of data has a floating location which differs from version to
+ * version, and is pointed by the mcp_meta_data below.
+ * Moreover, the spad_layout section is part of the MFW firmware, and is loaded
+ * with it from nvram in order to clear this portion.
+ */
+struct static_init {
+       u32 num_sections;
+       offsize_t sections[SPAD_SECTION_MAX];
+#define SECTION(_sec_) (*((offsize_t *)(STRUCT_OFFSET(sections[_sec_]))))
+
+       struct mcp_trace trace;
+#define MCP_TRACE_P ((struct mcp_trace *)(STRUCT_OFFSET(trace)))
+       u8 trace_buffer[MCP_TRACE_SIZE];
+#define MCP_TRACE_BUF ((u8 *)(STRUCT_OFFSET(trace_buffer)))
+       /* running_mfw has the same definition as in nvm_map.h.
+        * This bit indicate both the running dir, and the running bundle.
+        * It is set once when the LIM is loaded.
+        */
+       u32 running_mfw;
+#define RUNNING_MFW (*((u32 *)(STRUCT_OFFSET(running_mfw))))
+       u32 build_time;
+#define MFW_BUILD_TIME (*((u32 *)(STRUCT_OFFSET(build_time))))
+       u32 reset_type;
+#define RESET_TYPE (*((u32 *)(STRUCT_OFFSET(reset_type))))
+       u32 mfw_secure_mode;
+#define MFW_SECURE_MODE (*((u32 *)(STRUCT_OFFSET(mfw_secure_mode))))
+       u16 pme_status_pf_bitmap;
+#define PME_STATUS_PF_BITMAP (*((u16 *)(STRUCT_OFFSET(pme_status_pf_bitmap))))
+       u16 pme_enable_pf_bitmap;
+#define PME_ENABLE_PF_BITMAP (*((u16 *)(STRUCT_OFFSET(pme_enable_pf_bitmap))))
+       u32 mim_nvm_addr;
+       u32 mim_start_addr;
+       u32 ah_pcie_link_params;
+#define AH_PCIE_LINK_PARAMS_LINK_SPEED_MASK     (0x000000ff)
+#define AH_PCIE_LINK_PARAMS_LINK_SPEED_SHIFT    (0)
+#define AH_PCIE_LINK_PARAMS_LINK_WIDTH_MASK     (0x0000ff00)
+#define AH_PCIE_LINK_PARAMS_LINK_WIDTH_SHIFT    (8)
+#define AH_PCIE_LINK_PARAMS_ASPM_MODE_MASK      (0x00ff0000)
+#define AH_PCIE_LINK_PARAMS_ASPM_MODE_SHIFT     (16)
+#define AH_PCIE_LINK_PARAMS_ASPM_CAP_MASK       (0xff000000)
+#define AH_PCIE_LINK_PARAMS_ASPM_CAP_SHIFT      (24)
+#define AH_PCIE_LINK_PARAMS (*((u32 *)(STRUCT_OFFSET(ah_pcie_link_params))))
+
+       u32 rsrv_persist[5];    /* Persist reserved for MFW upgrades */
+};
+
+enum nvm_image_type {
+       NVM_TYPE_TIM1 = 0x01,
+       NVM_TYPE_TIM2 = 0x02,
+       NVM_TYPE_MIM1 = 0x03,
+       NVM_TYPE_MIM2 = 0x04,
+       NVM_TYPE_MBA = 0x05,
+       NVM_TYPE_MODULES_PN = 0x06,
+       NVM_TYPE_VPD = 0x07,
+       NVM_TYPE_MFW_TRACE1 = 0x08,
+       NVM_TYPE_MFW_TRACE2 = 0x09,
+       NVM_TYPE_NVM_CFG1 = 0x0a,
+       NVM_TYPE_L2B = 0x0b,
+       NVM_TYPE_DIR1 = 0x0c,
+       NVM_TYPE_EAGLE_FW1 = 0x0d,
+       NVM_TYPE_FALCON_FW1 = 0x0e,
+       NVM_TYPE_PCIE_FW1 = 0x0f,
+       NVM_TYPE_HW_SET = 0x10,
+       NVM_TYPE_LIM = 0x11,
+       NVM_TYPE_AVS_FW1 = 0x12,
+       NVM_TYPE_DIR2 = 0x13,
+       NVM_TYPE_CCM = 0x14,
+       NVM_TYPE_EAGLE_FW2 = 0x15,
+       NVM_TYPE_FALCON_FW2 = 0x16,
+       NVM_TYPE_PCIE_FW2 = 0x17,
+       NVM_TYPE_AVS_FW2 = 0x18,
+       NVM_TYPE_INIT_HW = 0x19,
+       NVM_TYPE_DEFAULT_CFG = 0x1a,
+       NVM_TYPE_MDUMP = 0x1b,
+       NVM_TYPE_META = 0x1c,
+       NVM_TYPE_ISCSI_CFG = 0x1d,
+       NVM_TYPE_FCOE_CFG = 0x1f,
+       NVM_TYPE_ETH_PHY_FW1 = 0x20,
+       NVM_TYPE_ETH_PHY_FW2 = 0x21,
+       NVM_TYPE_MAX,
+};
+
+#define DIR_ID_1    (0)
+
 #endif
index 8ebdc79..6e4fae9 100644 (file)
@@ -482,28 +482,22 @@ int qed_dmae_info_alloc(struct qed_hwfn *p_hwfn)
 
        *p_comp = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
                                     sizeof(u32), p_addr, GFP_KERNEL);
-       if (!*p_comp) {
-               DP_NOTICE(p_hwfn, "Failed to allocate `p_completion_word'\n");
+       if (!*p_comp)
                goto err;
-       }
 
        p_addr = &p_hwfn->dmae_info.dmae_cmd_phys_addr;
        *p_cmd = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
                                    sizeof(struct dmae_cmd),
                                    p_addr, GFP_KERNEL);
-       if (!*p_cmd) {
-               DP_NOTICE(p_hwfn, "Failed to allocate `struct dmae_cmd'\n");
+       if (!*p_cmd)
                goto err;
-       }
 
        p_addr = &p_hwfn->dmae_info.intermediate_buffer_phys_addr;
        *p_buff = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
                                     sizeof(u32) * DMAE_MAX_RW_SIZE,
                                     p_addr, GFP_KERNEL);
-       if (!*p_buff) {
-               DP_NOTICE(p_hwfn, "Failed to allocate `intermediate_buffer'\n");
+       if (!*p_buff)
                goto err;
-       }
 
        p_hwfn->dmae_info.channel = p_hwfn->rel_pf_id;
 
index b7a4b27..d567ba9 100644 (file)
@@ -460,10 +460,8 @@ int qed_init_run(struct qed_hwfn *p_hwfn,
        init_ops = cdev->fw_data->init_ops;
 
        p_hwfn->unzip_buf = kzalloc(MAX_ZIPPED_SIZE * 4, GFP_ATOMIC);
-       if (!p_hwfn->unzip_buf) {
-               DP_NOTICE(p_hwfn, "Failed to allocate unzip buffer\n");
+       if (!p_hwfn->unzip_buf)
                return -ENOMEM;
-       }
 
        for (cmd_num = 0; cmd_num < num_init_ops; cmd_num++) {
                union init_op *cmd = &init_ops[cmd_num];
@@ -534,7 +532,7 @@ int qed_init_fw_data(struct qed_dev *cdev, const u8 *data)
        /* First Dword contains metadata and should be skipped */
        buf_hdr = (struct bin_buffer_hdr *)(data + sizeof(u32));
 
-       offset = buf_hdr[BIN_BUF_FW_VER_INFO].offset;
+       offset = buf_hdr[BIN_BUF_INIT_FW_VER_INFO].offset;
        fw->fw_ver_info = (struct fw_ver_info *)(data + offset);
 
        offset = buf_hdr[BIN_BUF_INIT_CMD].offset;
index 61ec973..2adedc6 100644 (file)
@@ -2370,10 +2370,8 @@ static int qed_int_sb_attn_alloc(struct qed_hwfn *p_hwfn,
 
        /* SB struct */
        p_sb = kmalloc(sizeof(*p_sb), GFP_KERNEL);
-       if (!p_sb) {
-               DP_NOTICE(cdev, "Failed to allocate `struct qed_sb_attn_info'\n");
+       if (!p_sb)
                return -ENOMEM;
-       }
 
        /* SB ring  */
        p_virt = dma_alloc_coherent(&cdev->pdev->dev,
@@ -2381,7 +2379,6 @@ static int qed_int_sb_attn_alloc(struct qed_hwfn *p_hwfn,
                                    &p_phys, GFP_KERNEL);
 
        if (!p_virt) {
-               DP_NOTICE(cdev, "Failed to allocate status block (attentions)\n");
                kfree(p_sb);
                return -ENOMEM;
        }
@@ -2667,17 +2664,14 @@ static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 
        /* SB struct */
        p_sb = kmalloc(sizeof(*p_sb), GFP_KERNEL);
-       if (!p_sb) {
-               DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_sb_info'\n");
+       if (!p_sb)
                return -ENOMEM;
-       }
 
        /* SB ring  */
        p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
                                    SB_ALIGNED_SIZE(p_hwfn),
                                    &p_phys, GFP_KERNEL);
        if (!p_virt) {
-               DP_NOTICE(p_hwfn, "Failed to allocate status block\n");
                kfree(p_sb);
                return -ENOMEM;
        }
@@ -2959,7 +2953,6 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        u16 prev_sb_id = 0xFF;
 
        p_hwfn->hw_info.p_igu_info = kzalloc(sizeof(*p_igu_info), GFP_KERNEL);
-
        if (!p_hwfn->hw_info.p_igu_info)
                return -ENOMEM;
 
@@ -3121,18 +3114,14 @@ int qed_int_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        int rc = 0;
 
        rc = qed_int_sp_dpc_alloc(p_hwfn);
-       if (rc) {
-               DP_ERR(p_hwfn->cdev, "Failed to allocate sp dpc mem\n");
+       if (rc)
                return rc;
-       }
+
        rc = qed_int_sp_sb_alloc(p_hwfn, p_ptt);
-       if (rc) {
-               DP_ERR(p_hwfn->cdev, "Failed to allocate sp sb mem\n");
+       if (rc)
                return rc;
-       }
+
        rc = qed_int_sb_attn_alloc(p_hwfn, p_ptt);
-       if (rc)
-               DP_ERR(p_hwfn->cdev, "Failed to allocate sb attn mem\n");
 
        return rc;
 }
index c823c46..ddd410a 100644 (file)
@@ -101,6 +101,9 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
 
        p_ramrod->tx_switching_en = p_params->tx_switching;
 
+       p_ramrod->ctl_frame_mac_check_en = !!p_params->check_mac;
+       p_ramrod->ctl_frame_ethtype_check_en = !!p_params->check_ethtype;
+
        /* Software Function ID in hwfn (PFs are 0 - 15, VFs are 16 - 135) */
        p_ramrod->sw_fid = qed_concrete_to_sw_fid(p_hwfn->cdev,
                                                  p_params->concrete_fid);
@@ -108,8 +111,8 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
        return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
-int qed_sp_vport_start(struct qed_hwfn *p_hwfn,
-                      struct qed_sp_vport_start_params *p_params)
+static int qed_sp_vport_start(struct qed_hwfn *p_hwfn,
+                             struct qed_sp_vport_start_params *p_params)
 {
        if (IS_VF(p_hwfn->cdev)) {
                return qed_vf_pf_vport_start(p_hwfn, p_params->vport_id,
@@ -514,7 +517,8 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
                                u8 stats_id,
                                u16 bd_max_bytes,
                                dma_addr_t bd_chain_phys_addr,
-                               dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size)
+                               dma_addr_t cqe_pbl_addr,
+                               u16 cqe_pbl_size, bool b_use_zone_a_prod)
 {
        struct rx_queue_start_ramrod_data *p_ramrod = NULL;
        struct qed_spq_entry *p_ent = NULL;
@@ -571,11 +575,14 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
        p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size);
        DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, cqe_pbl_addr);
 
-       p_ramrod->vf_rx_prod_index = p_params->vf_qid;
-       if (p_params->vf_qid)
+       if (p_params->vf_qid || b_use_zone_a_prod) {
+               p_ramrod->vf_rx_prod_index = p_params->vf_qid;
                DP_VERBOSE(p_hwfn, QED_MSG_SP,
-                          "Queue is meant for VF rxq[%04x]\n",
+                          "Queue%s is meant for VF rxq[%02x]\n",
+                          b_use_zone_a_prod ? " [legacy]" : "",
                           p_params->vf_qid);
+               p_ramrod->vf_rx_prod_use_zone_a = b_use_zone_a_prod;
+       }
 
        return qed_spq_post(p_hwfn, p_ent, NULL);
 }
@@ -637,8 +644,7 @@ qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn,
                                         abs_stats_id,
                                         bd_max_bytes,
                                         bd_chain_phys_addr,
-                                        cqe_pbl_addr,
-                                        cqe_pbl_size);
+                                        cqe_pbl_addr, cqe_pbl_size, false);
 
        if (rc)
                qed_sp_release_queue_cid(p_hwfn, p_rx_cid);
@@ -1679,6 +1685,8 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev,
                qed_vf_get_num_vlan_filters(&cdev->hwfns[0],
                                            &info->num_vlan_filters);
                qed_vf_get_port_mac(&cdev->hwfns[0], info->port_mac);
+
+               info->is_legacy = !!cdev->hwfns[0].vf_iov_info->b_pre_fp_hsi;
        }
 
        qed_fill_dev_info(cdev, &info->common);
index 0021145..e495d62 100644 (file)
@@ -102,6 +102,8 @@ struct qed_sp_vport_start_params {
        u16 opaque_fid;
        u8 vport_id;
        u16 mtu;
+       bool check_mac;
+       bool check_ethtype;
 };
 
 int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
@@ -213,6 +215,8 @@ qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
                            enum spq_mode comp_mode,
                            struct qed_spq_comp_cb *p_comp_data);
 
+void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats);
+
 int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
                           struct qed_sp_vport_start_params *p_params);
 
@@ -223,7 +227,8 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
                                u8 stats_id,
                                u16 bd_max_bytes,
                                dma_addr_t bd_chain_phys_addr,
-                               dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size);
+                               dma_addr_t cqe_pbl_addr,
+                               u16 cqe_pbl_size, bool b_use_zone_a_prod);
 
 int qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
                                u16  opaque_fid,
index 54976cc..b730a63 100644 (file)
@@ -588,6 +588,8 @@ static int qed_nic_stop(struct qed_dev *cdev)
                }
        }
 
+       qed_dbg_pf_exit(cdev);
+
        return rc;
 }
 
@@ -841,13 +843,13 @@ static int qed_slowpath_start(struct qed_dev *cdev,
        if (IS_PF(cdev)) {
                /* Allocate stream for unzipping */
                rc = qed_alloc_stream_mem(cdev);
-               if (rc) {
-                       DP_NOTICE(cdev, "Failed to allocate stream memory\n");
+               if (rc)
                        goto err2;
-               }
 
                /* First Dword used to diffrentiate between various sources */
                data = cdev->firmware->data + sizeof(u32);
+
+               qed_dbg_pf_init(cdev);
        }
 
        memset(&tunn_info, 0, sizeof(tunn_info));
@@ -1396,9 +1398,32 @@ const struct qed_common_ops qed_common_ops_pass = {
        .get_link = &qed_get_current_link,
        .drain = &qed_drain,
        .update_msglvl = &qed_init_dp,
+       .dbg_all_data = &qed_dbg_all_data,
+       .dbg_all_data_size = &qed_dbg_all_data_size,
        .chain_alloc = &qed_chain_alloc,
        .chain_free = &qed_chain_free,
        .get_coalesce = &qed_get_coalesce,
        .set_coalesce = &qed_set_coalesce,
        .set_led = &qed_set_led,
 };
+
+void qed_get_protocol_stats(struct qed_dev *cdev,
+                           enum qed_mcp_protocol_type type,
+                           union qed_mcp_protocol_stats *stats)
+{
+       struct qed_eth_stats eth_stats;
+
+       memset(stats, 0, sizeof(*stats));
+
+       switch (type) {
+       case QED_MCP_LAN_STATS:
+               qed_get_vport_stats(cdev, &eth_stats);
+               stats->lan_stats.ucast_rx_pkts = eth_stats.rx_ucast_pkts;
+               stats->lan_stats.ucast_tx_pkts = eth_stats.tx_ucast_pkts;
+               stats->lan_stats.fcs_err = -1;
+               break;
+       default:
+               DP_ERR(cdev, "Invalid protocol type = %d\n", type);
+               return;
+       }
+}
index 88b448b..7d39cb9 100644 (file)
@@ -171,7 +171,6 @@ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        return 0;
 
 err:
-       DP_NOTICE(p_hwfn, "Failed to allocate mcp memory\n");
        qed_mcp_free(p_hwfn);
        return -ENOMEM;
 }
@@ -390,6 +389,34 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
        return 0;
 }
 
+int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn,
+                      struct qed_ptt *p_ptt,
+                      u32 cmd,
+                      u32 param,
+                      u32 *o_mcp_resp,
+                      u32 *o_mcp_param, u32 *o_txn_size, u32 *o_buf)
+{
+       struct qed_mcp_mb_params mb_params;
+       union drv_union_data union_data;
+       int rc;
+
+       memset(&mb_params, 0, sizeof(mb_params));
+       mb_params.cmd = cmd;
+       mb_params.param = param;
+       mb_params.p_data_dst = &union_data;
+       rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+       if (rc)
+               return rc;
+
+       *o_mcp_resp = mb_params.mcp_resp;
+       *o_mcp_param = mb_params.mcp_param;
+
+       *o_txn_size = *o_mcp_param;
+       memcpy(o_buf, &union_data.raw_data, *o_txn_size);
+
+       return 0;
+}
+
 int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
                     struct qed_ptt *p_ptt, u32 *p_load_code)
 {
@@ -713,6 +740,48 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_up)
        return 0;
 }
 
+static void qed_mcp_send_protocol_stats(struct qed_hwfn *p_hwfn,
+                                       struct qed_ptt *p_ptt,
+                                       enum MFW_DRV_MSG_TYPE type)
+{
+       enum qed_mcp_protocol_type stats_type;
+       union qed_mcp_protocol_stats stats;
+       struct qed_mcp_mb_params mb_params;
+       union drv_union_data union_data;
+       u32 hsi_param;
+
+       switch (type) {
+       case MFW_DRV_MSG_GET_LAN_STATS:
+               stats_type = QED_MCP_LAN_STATS;
+               hsi_param = DRV_MSG_CODE_STATS_TYPE_LAN;
+               break;
+       case MFW_DRV_MSG_GET_FCOE_STATS:
+               stats_type = QED_MCP_FCOE_STATS;
+               hsi_param = DRV_MSG_CODE_STATS_TYPE_FCOE;
+               break;
+       case MFW_DRV_MSG_GET_ISCSI_STATS:
+               stats_type = QED_MCP_ISCSI_STATS;
+               hsi_param = DRV_MSG_CODE_STATS_TYPE_ISCSI;
+               break;
+       case MFW_DRV_MSG_GET_RDMA_STATS:
+               stats_type = QED_MCP_RDMA_STATS;
+               hsi_param = DRV_MSG_CODE_STATS_TYPE_RDMA;
+               break;
+       default:
+               DP_NOTICE(p_hwfn, "Invalid protocol type %d\n", type);
+               return;
+       }
+
+       qed_get_protocol_stats(p_hwfn->cdev, stats_type, &stats);
+
+       memset(&mb_params, 0, sizeof(mb_params));
+       mb_params.cmd = DRV_MSG_CODE_GET_STATS;
+       mb_params.param = hsi_param;
+       memcpy(&union_data, &stats, sizeof(stats));
+       mb_params.p_data_src = &union_data;
+       qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+}
+
 static void qed_read_pf_bandwidth(struct qed_hwfn *p_hwfn,
                                  struct public_func *p_shmem_info)
 {
@@ -760,34 +829,6 @@ static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn,
        return size;
 }
 
-int qed_hw_init_first_eth(struct qed_hwfn *p_hwfn,
-                         struct qed_ptt *p_ptt, u8 *p_pf)
-{
-       struct public_func shmem_info;
-       int i;
-
-       /* Find first Ethernet interface in port */
-       for (i = 0; i < NUM_OF_ENG_PFS(p_hwfn->cdev);
-            i += p_hwfn->cdev->num_ports_in_engines) {
-               qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
-                                      MCP_PF_ID_BY_REL(p_hwfn, i));
-
-               if (shmem_info.config & FUNC_MF_CFG_FUNC_HIDE)
-                       continue;
-
-               if ((shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK) ==
-                   FUNC_MF_CFG_PROTOCOL_ETHERNET) {
-                       *p_pf = (u8)i;
-                       return 0;
-               }
-       }
-
-       DP_NOTICE(p_hwfn,
-                 "Failed to find on port an ethernet interface in MF_SI mode\n");
-
-       return -EINVAL;
-}
-
 static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        struct qed_mcp_function_info *p_info;
@@ -854,6 +895,12 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
                case MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE:
                        qed_mcp_handle_transceiver_change(p_hwfn, p_ptt);
                        break;
+               case MFW_DRV_MSG_GET_LAN_STATS:
+               case MFW_DRV_MSG_GET_FCOE_STATS:
+               case MFW_DRV_MSG_GET_ISCSI_STATS:
+               case MFW_DRV_MSG_GET_RDMA_STATS:
+                       qed_mcp_send_protocol_stats(p_hwfn, p_ptt, i);
+                       break;
                case MFW_DRV_MSG_BW_UPDATE:
                        qed_mcp_update_bw(p_hwfn, p_ptt);
                        break;
@@ -1149,6 +1196,33 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
        return rc;
 }
 
+int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+       u32 resp = 0, param = 0;
+       int rc;
+
+       rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MCP_HALT, 0, &resp,
+                        &param);
+       if (rc)
+               DP_ERR(p_hwfn, "MCP response failure, aborting\n");
+
+       return rc;
+}
+
+int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+       u32 value, cpu_mode;
+
+       qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_STATE, 0xffffffff);
+
+       value = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
+       value &= ~MCP_REG_CPU_MODE_SOFT_HALT;
+       qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, value);
+       cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
+
+       return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0;
+}
+
 int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
                    struct qed_ptt *p_ptt, enum qed_led_mode mode)
 {
@@ -1176,6 +1250,27 @@ int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
        return rc;
 }
 
+int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
+                         struct qed_ptt *p_ptt, u32 mask_parities)
+{
+       u32 resp = 0, param = 0;
+       int rc;
+
+       rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MASK_PARITIES,
+                        mask_parities, &resp, &param);
+
+       if (rc) {
+               DP_ERR(p_hwfn,
+                      "MCP response failure for mask parities, aborting\n");
+       } else if (resp != FW_MSG_CODE_OK) {
+               DP_ERR(p_hwfn,
+                      "MCP did not acknowledge mask parity request. Old MFW?\n");
+               rc = -EINVAL;
+       }
+
+       return rc;
+}
+
 int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        u32 drv_mb_param = 0, rsp, param;
index 013d1b9..dff520e 100644 (file)
@@ -106,6 +106,47 @@ struct qed_mcp_drv_version {
        u8      name[MCP_DRV_VER_STR_SIZE - 4];
 };
 
+struct qed_mcp_lan_stats {
+       u64 ucast_rx_pkts;
+       u64 ucast_tx_pkts;
+       u32 fcs_err;
+};
+
+struct qed_mcp_fcoe_stats {
+       u64 rx_pkts;
+       u64 tx_pkts;
+       u32 fcs_err;
+       u32 login_failure;
+};
+
+struct qed_mcp_iscsi_stats {
+       u64 rx_pdus;
+       u64 tx_pdus;
+       u64 rx_bytes;
+       u64 tx_bytes;
+};
+
+struct qed_mcp_rdma_stats {
+       u64 rx_pkts;
+       u64 tx_pkts;
+       u64 rx_bytes;
+       u64 tx_byts;
+};
+
+enum qed_mcp_protocol_type {
+       QED_MCP_LAN_STATS,
+       QED_MCP_FCOE_STATS,
+       QED_MCP_ISCSI_STATS,
+       QED_MCP_RDMA_STATS
+};
+
+union qed_mcp_protocol_stats {
+       struct qed_mcp_lan_stats lan_stats;
+       struct qed_mcp_fcoe_stats fcoe_stats;
+       struct qed_mcp_iscsi_stats iscsi_stats;
+       struct qed_mcp_rdma_stats rdma_stats;
+};
+
 /**
  * @brief - returns the link params of the hw function
  *
@@ -426,6 +467,29 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn,
 int qed_mcp_reset(struct qed_hwfn *p_hwfn,
                  struct qed_ptt *p_ptt);
 
+/**
+ * @brief - Sends an NVM read command request to the MFW to get
+ *        a buffer.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param cmd - Command: DRV_MSG_CODE_NVM_GET_FILE_DATA or
+ *            DRV_MSG_CODE_NVM_READ_NVRAM commands
+ * @param param - [0:23] - Offset [24:31] - Size
+ * @param o_mcp_resp - MCP response
+ * @param o_mcp_param - MCP response param
+ * @param o_txn_size -  Buffer size output
+ * @param o_buf - Pointer to the buffer returned by the MFW.
+ *
+ * @param return 0 upon success.
+ */
+int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn,
+                      struct qed_ptt *p_ptt,
+                      u32 cmd,
+                      u32 param,
+                      u32 *o_mcp_resp,
+                      u32 *o_mcp_param, u32 *o_txn_size, u32 *o_buf);
+
 /**
  * @brief indicates whether the MFW objects [under mcp_info] are accessible
  *
@@ -448,6 +512,26 @@ bool qed_mcp_is_init(struct qed_hwfn *p_hwfn);
 int qed_mcp_config_vf_msix(struct qed_hwfn *p_hwfn,
                           struct qed_ptt *p_ptt, u8 vf_id, u8 num);
 
+/**
+ * @brief - Halt the MCP.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @param return 0 upon success.
+ */
+int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+/**
+ * @brief - Wake up the MCP.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @param return 0 upon success.
+ */
+int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
 int qed_configure_pf_min_bandwidth(struct qed_dev *cdev, u8 min_bw);
 int qed_configure_pf_max_bandwidth(struct qed_dev *cdev, u8 max_bw);
 int __qed_configure_pf_max_bandwidth(struct qed_hwfn *p_hwfn,
@@ -459,6 +543,7 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn,
                                     struct qed_mcp_link_state *p_link,
                                     u8 min_bw);
 
-int qed_hw_init_first_eth(struct qed_hwfn *p_hwfn,
-                         struct qed_ptt *p_ptt, u8 *p_pf);
+int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
+                         struct qed_ptt *p_ptt, u32 mask_parities);
+
 #endif
index f6b86ca..759cb04 100644 (file)
        0x1009c4UL
 #define  QM_REG_PF_EN \
        0x2f2ea4UL
+#define TCFC_REG_WEAK_ENABLE_VF \
+       0x2d0704UL
 #define  TCFC_REG_STRONG_ENABLE_PF \
        0x2d0708UL
+#define  TCFC_REG_STRONG_ENABLE_VF \
+       0x2d070cUL
+#define CCFC_REG_WEAK_ENABLE_VF \
+       0x2e0704UL
 #define  CCFC_REG_STRONG_ENABLE_PF \
        0x2e0708UL
 #define  PGLUE_B_REG_PGL_ADDR_88_F0 \
 
 #define QM_REG_WFQPFWEIGHT     0x2f4e80UL
 #define QM_REG_WFQVPWEIGHT     0x2fa000UL
+
+#define PGLCS_REG_DBG_SELECT \
+       0x001d14UL
+#define PGLCS_REG_DBG_DWORD_ENABLE \
+       0x001d18UL
+#define PGLCS_REG_DBG_SHIFT \
+       0x001d1cUL
+#define PGLCS_REG_DBG_FORCE_VALID \
+       0x001d20UL
+#define PGLCS_REG_DBG_FORCE_FRAME \
+       0x001d24UL
+#define MISC_REG_RESET_PL_PDA_VMAIN_1 \
+       0x008070UL
+#define MISC_REG_RESET_PL_PDA_VMAIN_2 \
+       0x008080UL
+#define MISC_REG_RESET_PL_PDA_VAUX \
+       0x008090UL
+#define MISCS_REG_RESET_PL_UA \
+       0x009050UL
+#define MISCS_REG_RESET_PL_HV \
+       0x009060UL
+#define MISCS_REG_RESET_PL_HV_2        \
+       0x009150UL
+#define DMAE_REG_DBG_SELECT \
+       0x00c510UL
+#define DMAE_REG_DBG_DWORD_ENABLE \
+       0x00c514UL
+#define DMAE_REG_DBG_SHIFT \
+       0x00c518UL
+#define DMAE_REG_DBG_FORCE_VALID \
+       0x00c51cUL
+#define DMAE_REG_DBG_FORCE_FRAME \
+       0x00c520UL
+#define NCSI_REG_DBG_SELECT \
+       0x040474UL
+#define NCSI_REG_DBG_DWORD_ENABLE \
+       0x040478UL
+#define NCSI_REG_DBG_SHIFT \
+       0x04047cUL
+#define NCSI_REG_DBG_FORCE_VALID \
+       0x040480UL
+#define NCSI_REG_DBG_FORCE_FRAME \
+       0x040484UL
+#define GRC_REG_DBG_SELECT \
+       0x0500a4UL
+#define GRC_REG_DBG_DWORD_ENABLE \
+       0x0500a8UL
+#define GRC_REG_DBG_SHIFT \
+       0x0500acUL
+#define GRC_REG_DBG_FORCE_VALID        \
+       0x0500b0UL
+#define GRC_REG_DBG_FORCE_FRAME        \
+       0x0500b4UL
+#define UMAC_REG_DBG_SELECT \
+       0x051094UL
+#define UMAC_REG_DBG_DWORD_ENABLE \
+       0x051098UL
+#define UMAC_REG_DBG_SHIFT \
+       0x05109cUL
+#define UMAC_REG_DBG_FORCE_VALID \
+       0x0510a0UL
+#define UMAC_REG_DBG_FORCE_FRAME \
+       0x0510a4UL
+#define MCP2_REG_DBG_SELECT \
+       0x052400UL
+#define MCP2_REG_DBG_DWORD_ENABLE \
+       0x052404UL
+#define MCP2_REG_DBG_SHIFT \
+       0x052408UL
+#define MCP2_REG_DBG_FORCE_VALID \
+       0x052440UL
+#define MCP2_REG_DBG_FORCE_FRAME \
+       0x052444UL
+#define PCIE_REG_DBG_SELECT \
+       0x0547e8UL
+#define PCIE_REG_DBG_DWORD_ENABLE \
+       0x0547ecUL
+#define PCIE_REG_DBG_SHIFT \
+       0x0547f0UL
+#define PCIE_REG_DBG_FORCE_VALID \
+       0x0547f4UL
+#define PCIE_REG_DBG_FORCE_FRAME \
+       0x0547f8UL
+#define DORQ_REG_DBG_SELECT \
+       0x100ad0UL
+#define DORQ_REG_DBG_DWORD_ENABLE \
+       0x100ad4UL
+#define DORQ_REG_DBG_SHIFT \
+       0x100ad8UL
+#define DORQ_REG_DBG_FORCE_VALID \
+       0x100adcUL
+#define DORQ_REG_DBG_FORCE_FRAME \
+       0x100ae0UL
+#define IGU_REG_DBG_SELECT \
+       0x181578UL
+#define IGU_REG_DBG_DWORD_ENABLE \
+       0x18157cUL
+#define IGU_REG_DBG_SHIFT \
+       0x181580UL
+#define IGU_REG_DBG_FORCE_VALID        \
+       0x181584UL
+#define IGU_REG_DBG_FORCE_FRAME        \
+       0x181588UL
+#define CAU_REG_DBG_SELECT \
+       0x1c0ea8UL
+#define CAU_REG_DBG_DWORD_ENABLE \
+       0x1c0eacUL
+#define CAU_REG_DBG_SHIFT \
+       0x1c0eb0UL
+#define CAU_REG_DBG_FORCE_VALID        \
+       0x1c0eb4UL
+#define CAU_REG_DBG_FORCE_FRAME        \
+       0x1c0eb8UL
+#define PRS_REG_DBG_SELECT \
+       0x1f0b6cUL
+#define PRS_REG_DBG_DWORD_ENABLE \
+       0x1f0b70UL
+#define PRS_REG_DBG_SHIFT \
+       0x1f0b74UL
+#define PRS_REG_DBG_FORCE_VALID        \
+       0x1f0ba0UL
+#define PRS_REG_DBG_FORCE_FRAME        \
+       0x1f0ba4UL
+#define CNIG_REG_DBG_SELECT_K2 \
+       0x218254UL
+#define CNIG_REG_DBG_DWORD_ENABLE_K2 \
+       0x218258UL
+#define CNIG_REG_DBG_SHIFT_K2 \
+       0x21825cUL
+#define CNIG_REG_DBG_FORCE_VALID_K2 \
+       0x218260UL
+#define CNIG_REG_DBG_FORCE_FRAME_K2 \
+       0x218264UL
+#define PRM_REG_DBG_SELECT \
+       0x2306a8UL
+#define PRM_REG_DBG_DWORD_ENABLE \
+       0x2306acUL
+#define PRM_REG_DBG_SHIFT \
+       0x2306b0UL
+#define PRM_REG_DBG_FORCE_VALID        \
+       0x2306b4UL
+#define PRM_REG_DBG_FORCE_FRAME        \
+       0x2306b8UL
+#define SRC_REG_DBG_SELECT \
+       0x238700UL
+#define SRC_REG_DBG_DWORD_ENABLE \
+       0x238704UL
+#define SRC_REG_DBG_SHIFT \
+       0x238708UL
+#define SRC_REG_DBG_FORCE_VALID        \
+       0x23870cUL
+#define SRC_REG_DBG_FORCE_FRAME        \
+       0x238710UL
+#define RSS_REG_DBG_SELECT \
+       0x238c4cUL
+#define RSS_REG_DBG_DWORD_ENABLE \
+       0x238c50UL
+#define RSS_REG_DBG_SHIFT \
+       0x238c54UL
+#define RSS_REG_DBG_FORCE_VALID        \
+       0x238c58UL
+#define RSS_REG_DBG_FORCE_FRAME        \
+       0x238c5cUL
+#define RPB_REG_DBG_SELECT \
+       0x23c728UL
+#define RPB_REG_DBG_DWORD_ENABLE \
+       0x23c72cUL
+#define RPB_REG_DBG_SHIFT \
+       0x23c730UL
+#define RPB_REG_DBG_FORCE_VALID        \
+       0x23c734UL
+#define RPB_REG_DBG_FORCE_FRAME        \
+       0x23c738UL
+#define PSWRQ2_REG_DBG_SELECT \
+       0x240100UL
+#define PSWRQ2_REG_DBG_DWORD_ENABLE \
+       0x240104UL
+#define PSWRQ2_REG_DBG_SHIFT \
+       0x240108UL
+#define PSWRQ2_REG_DBG_FORCE_VALID \
+       0x24010cUL
+#define PSWRQ2_REG_DBG_FORCE_FRAME \
+       0x240110UL
+#define PSWRQ_REG_DBG_SELECT \
+       0x280020UL
+#define PSWRQ_REG_DBG_DWORD_ENABLE \
+       0x280024UL
+#define PSWRQ_REG_DBG_SHIFT \
+       0x280028UL
+#define PSWRQ_REG_DBG_FORCE_VALID \
+       0x28002cUL
+#define PSWRQ_REG_DBG_FORCE_FRAME \
+       0x280030UL
+#define PSWWR_REG_DBG_SELECT \
+       0x29a084UL
+#define PSWWR_REG_DBG_DWORD_ENABLE \
+       0x29a088UL
+#define PSWWR_REG_DBG_SHIFT \
+       0x29a08cUL
+#define PSWWR_REG_DBG_FORCE_VALID \
+       0x29a090UL
+#define PSWWR_REG_DBG_FORCE_FRAME \
+       0x29a094UL
+#define PSWRD_REG_DBG_SELECT \
+       0x29c040UL
+#define PSWRD_REG_DBG_DWORD_ENABLE \
+       0x29c044UL
+#define PSWRD_REG_DBG_SHIFT \
+       0x29c048UL
+#define PSWRD_REG_DBG_FORCE_VALID \
+       0x29c04cUL
+#define PSWRD_REG_DBG_FORCE_FRAME \
+       0x29c050UL
+#define PSWRD2_REG_DBG_SELECT \
+       0x29d400UL
+#define PSWRD2_REG_DBG_DWORD_ENABLE \
+       0x29d404UL
+#define PSWRD2_REG_DBG_SHIFT \
+       0x29d408UL
+#define PSWRD2_REG_DBG_FORCE_VALID \
+       0x29d40cUL
+#define PSWRD2_REG_DBG_FORCE_FRAME \
+       0x29d410UL
+#define PSWHST2_REG_DBG_SELECT \
+       0x29e058UL
+#define PSWHST2_REG_DBG_DWORD_ENABLE \
+       0x29e05cUL
+#define PSWHST2_REG_DBG_SHIFT \
+       0x29e060UL
+#define PSWHST2_REG_DBG_FORCE_VALID \
+       0x29e064UL
+#define PSWHST2_REG_DBG_FORCE_FRAME \
+       0x29e068UL
+#define PSWHST_REG_DBG_SELECT \
+       0x2a0100UL
+#define PSWHST_REG_DBG_DWORD_ENABLE \
+       0x2a0104UL
+#define PSWHST_REG_DBG_SHIFT \
+       0x2a0108UL
+#define PSWHST_REG_DBG_FORCE_VALID \
+       0x2a010cUL
+#define PSWHST_REG_DBG_FORCE_FRAME \
+       0x2a0110UL
+#define PGLUE_B_REG_DBG_SELECT \
+       0x2a8400UL
+#define PGLUE_B_REG_DBG_DWORD_ENABLE \
+       0x2a8404UL
+#define PGLUE_B_REG_DBG_SHIFT \
+       0x2a8408UL
+#define PGLUE_B_REG_DBG_FORCE_VALID \
+       0x2a840cUL
+#define PGLUE_B_REG_DBG_FORCE_FRAME \
+       0x2a8410UL
+#define TM_REG_DBG_SELECT \
+       0x2c07a8UL
+#define TM_REG_DBG_DWORD_ENABLE        \
+       0x2c07acUL
+#define TM_REG_DBG_SHIFT \
+       0x2c07b0UL
+#define TM_REG_DBG_FORCE_VALID \
+       0x2c07b4UL
+#define TM_REG_DBG_FORCE_FRAME \
+       0x2c07b8UL
+#define TCFC_REG_DBG_SELECT \
+       0x2d0500UL
+#define TCFC_REG_DBG_DWORD_ENABLE \
+       0x2d0504UL
+#define TCFC_REG_DBG_SHIFT \
+       0x2d0508UL
+#define TCFC_REG_DBG_FORCE_VALID \
+       0x2d050cUL
+#define TCFC_REG_DBG_FORCE_FRAME \
+       0x2d0510UL
+#define CCFC_REG_DBG_SELECT \
+       0x2e0500UL
+#define CCFC_REG_DBG_DWORD_ENABLE \
+       0x2e0504UL
+#define CCFC_REG_DBG_SHIFT \
+       0x2e0508UL
+#define CCFC_REG_DBG_FORCE_VALID \
+       0x2e050cUL
+#define CCFC_REG_DBG_FORCE_FRAME \
+       0x2e0510UL
+#define QM_REG_DBG_SELECT \
+       0x2f2e74UL
+#define QM_REG_DBG_DWORD_ENABLE        \
+       0x2f2e78UL
+#define QM_REG_DBG_SHIFT \
+       0x2f2e7cUL
+#define QM_REG_DBG_FORCE_VALID \
+       0x2f2e80UL
+#define QM_REG_DBG_FORCE_FRAME \
+       0x2f2e84UL
+#define RDIF_REG_DBG_SELECT \
+       0x300500UL
+#define RDIF_REG_DBG_DWORD_ENABLE \
+       0x300504UL
+#define RDIF_REG_DBG_SHIFT \
+       0x300508UL
+#define RDIF_REG_DBG_FORCE_VALID \
+       0x30050cUL
+#define RDIF_REG_DBG_FORCE_FRAME \
+       0x300510UL
+#define TDIF_REG_DBG_SELECT \
+       0x310500UL
+#define TDIF_REG_DBG_DWORD_ENABLE \
+       0x310504UL
+#define TDIF_REG_DBG_SHIFT \
+       0x310508UL
+#define TDIF_REG_DBG_FORCE_VALID \
+       0x31050cUL
+#define TDIF_REG_DBG_FORCE_FRAME \
+       0x310510UL
+#define BRB_REG_DBG_SELECT \
+       0x340ed0UL
+#define BRB_REG_DBG_DWORD_ENABLE \
+       0x340ed4UL
+#define BRB_REG_DBG_SHIFT \
+       0x340ed8UL
+#define BRB_REG_DBG_FORCE_VALID        \
+       0x340edcUL
+#define BRB_REG_DBG_FORCE_FRAME        \
+       0x340ee0UL
+#define XYLD_REG_DBG_SELECT \
+       0x4c1600UL
+#define XYLD_REG_DBG_DWORD_ENABLE \
+       0x4c1604UL
+#define XYLD_REG_DBG_SHIFT \
+       0x4c1608UL
+#define XYLD_REG_DBG_FORCE_VALID \
+       0x4c160cUL
+#define XYLD_REG_DBG_FORCE_FRAME \
+       0x4c1610UL
+#define YULD_REG_DBG_SELECT \
+       0x4c9600UL
+#define YULD_REG_DBG_DWORD_ENABLE \
+       0x4c9604UL
+#define YULD_REG_DBG_SHIFT \
+       0x4c9608UL
+#define YULD_REG_DBG_FORCE_VALID \
+       0x4c960cUL
+#define YULD_REG_DBG_FORCE_FRAME \
+       0x4c9610UL
+#define TMLD_REG_DBG_SELECT \
+       0x4d1600UL
+#define TMLD_REG_DBG_DWORD_ENABLE \
+       0x4d1604UL
+#define TMLD_REG_DBG_SHIFT \
+       0x4d1608UL
+#define TMLD_REG_DBG_FORCE_VALID \
+       0x4d160cUL
+#define TMLD_REG_DBG_FORCE_FRAME \
+       0x4d1610UL
+#define MULD_REG_DBG_SELECT \
+       0x4e1600UL
+#define MULD_REG_DBG_DWORD_ENABLE \
+       0x4e1604UL
+#define MULD_REG_DBG_SHIFT \
+       0x4e1608UL
+#define MULD_REG_DBG_FORCE_VALID \
+       0x4e160cUL
+#define MULD_REG_DBG_FORCE_FRAME \
+       0x4e1610UL
+#define NIG_REG_DBG_SELECT \
+       0x502140UL
+#define NIG_REG_DBG_DWORD_ENABLE \
+       0x502144UL
+#define NIG_REG_DBG_SHIFT \
+       0x502148UL
+#define NIG_REG_DBG_FORCE_VALID        \
+       0x50214cUL
+#define NIG_REG_DBG_FORCE_FRAME        \
+       0x502150UL
+#define BMB_REG_DBG_SELECT \
+       0x540a7cUL
+#define BMB_REG_DBG_DWORD_ENABLE \
+       0x540a80UL
+#define BMB_REG_DBG_SHIFT \
+       0x540a84UL
+#define BMB_REG_DBG_FORCE_VALID        \
+       0x540a88UL
+#define BMB_REG_DBG_FORCE_FRAME        \
+       0x540a8cUL
+#define PTU_REG_DBG_SELECT \
+       0x560100UL
+#define PTU_REG_DBG_DWORD_ENABLE \
+       0x560104UL
+#define PTU_REG_DBG_SHIFT \
+       0x560108UL
+#define PTU_REG_DBG_FORCE_VALID        \
+       0x56010cUL
+#define PTU_REG_DBG_FORCE_FRAME        \
+       0x560110UL
+#define CDU_REG_DBG_SELECT \
+       0x580704UL
+#define CDU_REG_DBG_DWORD_ENABLE \
+       0x580708UL
+#define CDU_REG_DBG_SHIFT \
+       0x58070cUL
+#define CDU_REG_DBG_FORCE_VALID        \
+       0x580710UL
+#define CDU_REG_DBG_FORCE_FRAME        \
+       0x580714UL
+#define WOL_REG_DBG_SELECT \
+       0x600140UL
+#define WOL_REG_DBG_DWORD_ENABLE \
+       0x600144UL
+#define WOL_REG_DBG_SHIFT \
+       0x600148UL
+#define WOL_REG_DBG_FORCE_VALID        \
+       0x60014cUL
+#define WOL_REG_DBG_FORCE_FRAME        \
+       0x600150UL
+#define BMBN_REG_DBG_SELECT \
+       0x610140UL
+#define BMBN_REG_DBG_DWORD_ENABLE \
+       0x610144UL
+#define BMBN_REG_DBG_SHIFT \
+       0x610148UL
+#define BMBN_REG_DBG_FORCE_VALID \
+       0x61014cUL
+#define BMBN_REG_DBG_FORCE_FRAME \
+       0x610150UL
+#define NWM_REG_DBG_SELECT \
+       0x8000ecUL
+#define NWM_REG_DBG_DWORD_ENABLE \
+       0x8000f0UL
+#define NWM_REG_DBG_SHIFT \
+       0x8000f4UL
+#define NWM_REG_DBG_FORCE_VALID        \
+       0x8000f8UL
+#define NWM_REG_DBG_FORCE_FRAME        \
+       0x8000fcUL
+#define PBF_REG_DBG_SELECT \
+       0xd80060UL
+#define PBF_REG_DBG_DWORD_ENABLE \
+       0xd80064UL
+#define PBF_REG_DBG_SHIFT \
+       0xd80068UL
+#define PBF_REG_DBG_FORCE_VALID        \
+       0xd8006cUL
+#define PBF_REG_DBG_FORCE_FRAME        \
+       0xd80070UL
+#define PBF_PB1_REG_DBG_SELECT \
+       0xda0728UL
+#define PBF_PB1_REG_DBG_DWORD_ENABLE \
+       0xda072cUL
+#define PBF_PB1_REG_DBG_SHIFT \
+       0xda0730UL
+#define PBF_PB1_REG_DBG_FORCE_VALID \
+       0xda0734UL
+#define PBF_PB1_REG_DBG_FORCE_FRAME \
+       0xda0738UL
+#define PBF_PB2_REG_DBG_SELECT \
+       0xda4728UL
+#define PBF_PB2_REG_DBG_DWORD_ENABLE \
+       0xda472cUL
+#define PBF_PB2_REG_DBG_SHIFT \
+       0xda4730UL
+#define PBF_PB2_REG_DBG_FORCE_VALID \
+       0xda4734UL
+#define PBF_PB2_REG_DBG_FORCE_FRAME \
+       0xda4738UL
+#define BTB_REG_DBG_SELECT \
+       0xdb08c8UL
+#define BTB_REG_DBG_DWORD_ENABLE \
+       0xdb08ccUL
+#define BTB_REG_DBG_SHIFT \
+       0xdb08d0UL
+#define BTB_REG_DBG_FORCE_VALID        \
+       0xdb08d4UL
+#define BTB_REG_DBG_FORCE_FRAME        \
+       0xdb08d8UL
+#define XSDM_REG_DBG_SELECT \
+       0xf80e28UL
+#define XSDM_REG_DBG_DWORD_ENABLE \
+       0xf80e2cUL
+#define XSDM_REG_DBG_SHIFT \
+       0xf80e30UL
+#define XSDM_REG_DBG_FORCE_VALID \
+       0xf80e34UL
+#define XSDM_REG_DBG_FORCE_FRAME \
+       0xf80e38UL
+#define YSDM_REG_DBG_SELECT \
+       0xf90e28UL
+#define YSDM_REG_DBG_DWORD_ENABLE \
+       0xf90e2cUL
+#define YSDM_REG_DBG_SHIFT \
+       0xf90e30UL
+#define YSDM_REG_DBG_FORCE_VALID \
+       0xf90e34UL
+#define YSDM_REG_DBG_FORCE_FRAME \
+       0xf90e38UL
+#define PSDM_REG_DBG_SELECT \
+       0xfa0e28UL
+#define PSDM_REG_DBG_DWORD_ENABLE \
+       0xfa0e2cUL
+#define PSDM_REG_DBG_SHIFT \
+       0xfa0e30UL
+#define PSDM_REG_DBG_FORCE_VALID \
+       0xfa0e34UL
+#define PSDM_REG_DBG_FORCE_FRAME \
+       0xfa0e38UL
+#define TSDM_REG_DBG_SELECT \
+       0xfb0e28UL
+#define TSDM_REG_DBG_DWORD_ENABLE \
+       0xfb0e2cUL
+#define TSDM_REG_DBG_SHIFT \
+       0xfb0e30UL
+#define TSDM_REG_DBG_FORCE_VALID \
+       0xfb0e34UL
+#define TSDM_REG_DBG_FORCE_FRAME \
+       0xfb0e38UL
+#define MSDM_REG_DBG_SELECT \
+       0xfc0e28UL
+#define MSDM_REG_DBG_DWORD_ENABLE \
+       0xfc0e2cUL
+#define MSDM_REG_DBG_SHIFT \
+       0xfc0e30UL
+#define MSDM_REG_DBG_FORCE_VALID \
+       0xfc0e34UL
+#define MSDM_REG_DBG_FORCE_FRAME \
+       0xfc0e38UL
+#define USDM_REG_DBG_SELECT \
+       0xfd0e28UL
+#define USDM_REG_DBG_DWORD_ENABLE \
+       0xfd0e2cUL
+#define USDM_REG_DBG_SHIFT \
+       0xfd0e30UL
+#define USDM_REG_DBG_FORCE_VALID \
+       0xfd0e34UL
+#define USDM_REG_DBG_FORCE_FRAME \
+       0xfd0e38UL
+#define XCM_REG_DBG_SELECT \
+       0x1000040UL
+#define XCM_REG_DBG_DWORD_ENABLE \
+       0x1000044UL
+#define XCM_REG_DBG_SHIFT \
+       0x1000048UL
+#define XCM_REG_DBG_FORCE_VALID        \
+       0x100004cUL
+#define XCM_REG_DBG_FORCE_FRAME        \
+       0x1000050UL
+#define YCM_REG_DBG_SELECT \
+       0x1080040UL
+#define YCM_REG_DBG_DWORD_ENABLE \
+       0x1080044UL
+#define YCM_REG_DBG_SHIFT \
+       0x1080048UL
+#define YCM_REG_DBG_FORCE_VALID        \
+       0x108004cUL
+#define YCM_REG_DBG_FORCE_FRAME        \
+       0x1080050UL
+#define PCM_REG_DBG_SELECT \
+       0x1100040UL
+#define PCM_REG_DBG_DWORD_ENABLE \
+       0x1100044UL
+#define PCM_REG_DBG_SHIFT \
+       0x1100048UL
+#define PCM_REG_DBG_FORCE_VALID        \
+       0x110004cUL
+#define PCM_REG_DBG_FORCE_FRAME        \
+       0x1100050UL
+#define TCM_REG_DBG_SELECT \
+       0x1180040UL
+#define TCM_REG_DBG_DWORD_ENABLE \
+       0x1180044UL
+#define TCM_REG_DBG_SHIFT \
+       0x1180048UL
+#define TCM_REG_DBG_FORCE_VALID        \
+       0x118004cUL
+#define TCM_REG_DBG_FORCE_FRAME        \
+       0x1180050UL
+#define MCM_REG_DBG_SELECT \
+       0x1200040UL
+#define MCM_REG_DBG_DWORD_ENABLE \
+       0x1200044UL
+#define MCM_REG_DBG_SHIFT \
+       0x1200048UL
+#define MCM_REG_DBG_FORCE_VALID        \
+       0x120004cUL
+#define MCM_REG_DBG_FORCE_FRAME        \
+       0x1200050UL
+#define UCM_REG_DBG_SELECT \
+       0x1280050UL
+#define UCM_REG_DBG_DWORD_ENABLE \
+       0x1280054UL
+#define UCM_REG_DBG_SHIFT \
+       0x1280058UL
+#define UCM_REG_DBG_FORCE_VALID        \
+       0x128005cUL
+#define UCM_REG_DBG_FORCE_FRAME        \
+       0x1280060UL
+#define XSEM_REG_DBG_SELECT \
+       0x1401528UL
+#define XSEM_REG_DBG_DWORD_ENABLE \
+       0x140152cUL
+#define XSEM_REG_DBG_SHIFT \
+       0x1401530UL
+#define XSEM_REG_DBG_FORCE_VALID \
+       0x1401534UL
+#define XSEM_REG_DBG_FORCE_FRAME \
+       0x1401538UL
+#define YSEM_REG_DBG_SELECT \
+       0x1501528UL
+#define YSEM_REG_DBG_DWORD_ENABLE \
+       0x150152cUL
+#define YSEM_REG_DBG_SHIFT \
+       0x1501530UL
+#define YSEM_REG_DBG_FORCE_VALID \
+       0x1501534UL
+#define YSEM_REG_DBG_FORCE_FRAME \
+       0x1501538UL
+#define PSEM_REG_DBG_SELECT \
+       0x1601528UL
+#define PSEM_REG_DBG_DWORD_ENABLE \
+       0x160152cUL
+#define PSEM_REG_DBG_SHIFT \
+       0x1601530UL
+#define PSEM_REG_DBG_FORCE_VALID \
+       0x1601534UL
+#define PSEM_REG_DBG_FORCE_FRAME \
+       0x1601538UL
+#define TSEM_REG_DBG_SELECT \
+       0x1701528UL
+#define TSEM_REG_DBG_DWORD_ENABLE \
+       0x170152cUL
+#define TSEM_REG_DBG_SHIFT \
+       0x1701530UL
+#define TSEM_REG_DBG_FORCE_VALID \
+       0x1701534UL
+#define TSEM_REG_DBG_FORCE_FRAME \
+       0x1701538UL
+#define MSEM_REG_DBG_SELECT \
+       0x1801528UL
+#define MSEM_REG_DBG_DWORD_ENABLE \
+       0x180152cUL
+#define MSEM_REG_DBG_SHIFT \
+       0x1801530UL
+#define MSEM_REG_DBG_FORCE_VALID \
+       0x1801534UL
+#define MSEM_REG_DBG_FORCE_FRAME \
+       0x1801538UL
+#define USEM_REG_DBG_SELECT \
+       0x1901528UL
+#define USEM_REG_DBG_DWORD_ENABLE \
+       0x190152cUL
+#define USEM_REG_DBG_SHIFT \
+       0x1901530UL
+#define USEM_REG_DBG_FORCE_VALID \
+       0x1901534UL
+#define USEM_REG_DBG_FORCE_FRAME \
+       0x1901538UL
+#define PCIE_REG_DBG_COMMON_SELECT \
+       0x054398UL
+#define PCIE_REG_DBG_COMMON_DWORD_ENABLE \
+       0x05439cUL
+#define PCIE_REG_DBG_COMMON_SHIFT \
+       0x0543a0UL
+#define PCIE_REG_DBG_COMMON_FORCE_VALID        \
+       0x0543a4UL
+#define PCIE_REG_DBG_COMMON_FORCE_FRAME        \
+       0x0543a8UL
+#define MISC_REG_RESET_PL_UA \
+       0x008050UL
+#define MISC_REG_RESET_PL_HV \
+       0x008060UL
+#define XCM_REG_CTX_RBC_ACCS \
+       0x1001800UL
+#define XCM_REG_AGG_CON_CTX \
+       0x1001804UL
+#define XCM_REG_SM_CON_CTX \
+       0x1001808UL
+#define YCM_REG_CTX_RBC_ACCS \
+       0x1081800UL
+#define YCM_REG_AGG_CON_CTX \
+       0x1081804UL
+#define YCM_REG_AGG_TASK_CTX \
+       0x1081808UL
+#define YCM_REG_SM_CON_CTX \
+       0x108180cUL
+#define YCM_REG_SM_TASK_CTX \
+       0x1081810UL
+#define PCM_REG_CTX_RBC_ACCS \
+       0x1101440UL
+#define PCM_REG_SM_CON_CTX \
+       0x1101444UL
+#define TCM_REG_CTX_RBC_ACCS \
+       0x11814c0UL
+#define TCM_REG_AGG_CON_CTX \
+       0x11814c4UL
+#define TCM_REG_AGG_TASK_CTX \
+       0x11814c8UL
+#define TCM_REG_SM_CON_CTX \
+       0x11814ccUL
+#define TCM_REG_SM_TASK_CTX \
+       0x11814d0UL
+#define MCM_REG_CTX_RBC_ACCS \
+       0x1201800UL
+#define MCM_REG_AGG_CON_CTX \
+       0x1201804UL
+#define MCM_REG_AGG_TASK_CTX \
+       0x1201808UL
+#define MCM_REG_SM_CON_CTX \
+       0x120180cUL
+#define MCM_REG_SM_TASK_CTX \
+       0x1201810UL
+#define UCM_REG_CTX_RBC_ACCS \
+       0x1281700UL
+#define UCM_REG_AGG_CON_CTX \
+       0x1281704UL
+#define UCM_REG_AGG_TASK_CTX \
+       0x1281708UL
+#define UCM_REG_SM_CON_CTX \
+       0x128170cUL
+#define UCM_REG_SM_TASK_CTX \
+       0x1281710UL
+#define XSEM_REG_SLOW_DBG_EMPTY        \
+       0x1401140UL
+#define XSEM_REG_SYNC_DBG_EMPTY        \
+       0x1401160UL
+#define XSEM_REG_SLOW_DBG_ACTIVE \
+       0x1401400UL
+#define XSEM_REG_SLOW_DBG_MODE \
+       0x1401404UL
+#define XSEM_REG_DBG_FRAME_MODE        \
+       0x1401408UL
+#define XSEM_REG_DBG_MODE1_CFG \
+       0x1401420UL
+#define XSEM_REG_FAST_MEMORY \
+       0x1440000UL
+#define YSEM_REG_SYNC_DBG_EMPTY        \
+       0x1501160UL
+#define YSEM_REG_SLOW_DBG_ACTIVE \
+       0x1501400UL
+#define YSEM_REG_SLOW_DBG_MODE \
+       0x1501404UL
+#define YSEM_REG_DBG_FRAME_MODE        \
+       0x1501408UL
+#define YSEM_REG_DBG_MODE1_CFG \
+       0x1501420UL
+#define YSEM_REG_FAST_MEMORY \
+       0x1540000UL
+#define PSEM_REG_SLOW_DBG_EMPTY        \
+       0x1601140UL
+#define PSEM_REG_SYNC_DBG_EMPTY        \
+       0x1601160UL
+#define PSEM_REG_SLOW_DBG_ACTIVE \
+       0x1601400UL
+#define PSEM_REG_SLOW_DBG_MODE \
+       0x1601404UL
+#define PSEM_REG_DBG_FRAME_MODE        \
+       0x1601408UL
+#define PSEM_REG_DBG_MODE1_CFG \
+       0x1601420UL
+#define PSEM_REG_FAST_MEMORY \
+       0x1640000UL
+#define TSEM_REG_SLOW_DBG_EMPTY        \
+       0x1701140UL
+#define TSEM_REG_SYNC_DBG_EMPTY        \
+       0x1701160UL
+#define TSEM_REG_SLOW_DBG_ACTIVE \
+       0x1701400UL
+#define TSEM_REG_SLOW_DBG_MODE \
+       0x1701404UL
+#define TSEM_REG_DBG_FRAME_MODE        \
+       0x1701408UL
+#define TSEM_REG_DBG_MODE1_CFG \
+       0x1701420UL
+#define TSEM_REG_FAST_MEMORY \
+       0x1740000UL
+#define MSEM_REG_SLOW_DBG_EMPTY        \
+       0x1801140UL
+#define MSEM_REG_SYNC_DBG_EMPTY        \
+       0x1801160UL
+#define MSEM_REG_SLOW_DBG_ACTIVE \
+       0x1801400UL
+#define MSEM_REG_SLOW_DBG_MODE \
+       0x1801404UL
+#define MSEM_REG_DBG_FRAME_MODE        \
+       0x1801408UL
+#define MSEM_REG_DBG_MODE1_CFG \
+       0x1801420UL
+#define MSEM_REG_FAST_MEMORY \
+       0x1840000UL
+#define USEM_REG_SLOW_DBG_EMPTY        \
+       0x1901140UL
+#define USEM_REG_SYNC_DBG_EMPTY        \
+       0x1901160UL
+#define USEM_REG_SLOW_DBG_ACTIVE \
+       0x1901400UL
+#define USEM_REG_SLOW_DBG_MODE \
+       0x1901404UL
+#define USEM_REG_DBG_FRAME_MODE        \
+       0x1901408UL
+#define USEM_REG_DBG_MODE1_CFG \
+       0x1901420UL
+#define USEM_REG_FAST_MEMORY \
+       0x1940000UL
+#define SEM_FAST_REG_INT_RAM \
+       0x020000UL
+#define SEM_FAST_REG_INT_RAM_SIZE \
+       20480
+#define GRC_REG_TRACE_FIFO_VALID_DATA \
+       0x050064UL
+#define GRC_REG_NUMBER_VALID_OVERRIDE_WINDOW \
+       0x05040cUL
+#define GRC_REG_PROTECTION_OVERRIDE_WINDOW \
+       0x050500UL
+#define IGU_REG_ERROR_HANDLING_MEMORY \
+       0x181520UL
+#define MCP_REG_CPU_MODE \
+       0xe05000UL
+#define MCP_REG_CPU_MODE_SOFT_HALT \
+               (0x1 << 10)
+#define BRB_REG_BIG_RAM_ADDRESS \
+       0x340800UL
+#define BRB_REG_BIG_RAM_DATA \
+       0x341500UL
+#define SEM_FAST_REG_STALL_0 \
+       0x000488UL
+#define SEM_FAST_REG_STALLED \
+       0x000494UL
+#define BTB_REG_BIG_RAM_ADDRESS \
+       0xdb0800UL
+#define BTB_REG_BIG_RAM_DATA \
+       0xdb0c00UL
+#define BMB_REG_BIG_RAM_ADDRESS \
+       0x540800UL
+#define BMB_REG_BIG_RAM_DATA \
+       0x540f00UL
+#define SEM_FAST_REG_STORM_REG_FILE \
+       0x008000UL
+#define RSS_REG_RSS_RAM_ADDR \
+       0x238c30UL
+#define MISCS_REG_BLOCK_256B_EN \
+       0x009074UL
+#define MCP_REG_SCRATCH_SIZE \
+       57344
+#define MCP_REG_CPU_REG_FILE \
+       0xe05200UL
+#define MCP_REG_CPU_REG_FILE_SIZE \
+       32
+#define DBG_REG_DEBUG_TARGET \
+       0x01005cUL
+#define DBG_REG_FULL_MODE \
+       0x010060UL
+#define DBG_REG_CALENDAR_OUT_DATA \
+       0x010480UL
+#define GRC_REG_TRACE_FIFO \
+       0x050068UL
+#define IGU_REG_ERROR_HANDLING_DATA_VALID \
+       0x181530UL
+#define DBG_REG_DBG_BLOCK_ON \
+       0x010454UL
+#define DBG_REG_FRAMING_MODE \
+       0x010058UL
+#define SEM_FAST_REG_VFC_DATA_WR \
+       0x000b40UL
+#define SEM_FAST_REG_VFC_ADDR \
+       0x000b44UL
+#define SEM_FAST_REG_VFC_DATA_RD \
+       0x000b48UL
+#define RSS_REG_RSS_RAM_DATA \
+       0x238c20UL
+#define MISC_REG_BLOCK_256B_EN \
+       0x008c14UL
+#define NWS_REG_NWS_CMU        \
+       0x720000UL
+#define PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_7_0 \
+       0x000680UL
+#define PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_15_8 \
+       0x000684UL
+#define PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_7_0 \
+       0x0006c0UL
+#define PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_11_8 \
+       0x0006c4UL
+#define MS_REG_MS_CMU \
+       0x6a4000UL
+#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X130 \
+       0x000208UL
+#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X132 \
+       0x000210UL
+#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X131 \
+       0x00020cUL
+#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X133 \
+       0x000214UL
+#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130 \
+       0x000208UL
+#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131 \
+       0x00020cUL
+#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132 \
+       0x000210UL
+#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133 \
+       0x000214UL
+#define PHY_PCIE_REG_PHY0 \
+       0x620000UL
+#define PHY_PCIE_REG_PHY1 \
+       0x624000UL
+
 #endif
index a342bfe..9b7678f 100644 (file)
@@ -2,6 +2,7 @@
 #include "qed_dev_api.h"
 #include "qed_mcp.h"
 #include "qed_sp.h"
+#include "qed_selftest.h"
 
 int qed_selftest_memory(struct qed_dev *cdev)
 {
index 0265a32..349af18 100644 (file)
@@ -323,10 +323,8 @@ struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn, u16 num_elem)
 
        /* Allocate EQ struct */
        p_eq = kzalloc(sizeof(*p_eq), GFP_KERNEL);
-       if (!p_eq) {
-               DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_eq'\n");
+       if (!p_eq)
                return NULL;
-       }
 
        /* Allocate and initialize EQ chain*/
        if (qed_chain_alloc(p_hwfn->cdev,
@@ -335,10 +333,8 @@ struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn, u16 num_elem)
                            QED_CHAIN_CNT_TYPE_U16,
                            num_elem,
                            sizeof(union event_ring_element),
-                           &p_eq->chain)) {
-               DP_NOTICE(p_hwfn, "Failed to allocate eq chain\n");
+                           &p_eq->chain))
                goto eq_allocate_fail;
-       }
 
        /* register EQ completion on the SP SB */
        qed_int_register_cb(p_hwfn, qed_eq_completion,
@@ -451,10 +447,8 @@ int qed_spq_alloc(struct qed_hwfn *p_hwfn)
 
        /* SPQ struct */
        p_spq = kzalloc(sizeof(struct qed_spq), GFP_KERNEL);
-       if (!p_spq) {
-               DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_spq'\n");
+       if (!p_spq)
                return -ENOMEM;
-       }
 
        /* SPQ ring  */
        if (qed_chain_alloc(p_hwfn->cdev,
@@ -463,18 +457,14 @@ int qed_spq_alloc(struct qed_hwfn *p_hwfn)
                            QED_CHAIN_CNT_TYPE_U16,
                            0,   /* N/A when the mode is SINGLE */
                            sizeof(struct slow_path_element),
-                           &p_spq->chain)) {
-               DP_NOTICE(p_hwfn, "Failed to allocate spq chain\n");
+                           &p_spq->chain))
                goto spq_allocate_fail;
-       }
 
        /* allocate and fill the SPQ elements (incl. ramrod data list) */
        capacity = qed_chain_get_capacity(&p_spq->chain);
        p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
-                                   capacity *
-                                   sizeof(struct qed_spq_entry),
+                                   capacity * sizeof(struct qed_spq_entry),
                                    &p_phys, GFP_KERNEL);
-
        if (!p_virt)
                goto spq_allocate_fail;
 
@@ -863,10 +853,8 @@ struct qed_consq *qed_consq_alloc(struct qed_hwfn *p_hwfn)
 
        /* Allocate ConsQ struct */
        p_consq = kzalloc(sizeof(*p_consq), GFP_KERNEL);
-       if (!p_consq) {
-               DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_consq'\n");
+       if (!p_consq)
                return NULL;
-       }
 
        /* Allocate and initialize EQ chain*/
        if (qed_chain_alloc(p_hwfn->cdev,
@@ -874,10 +862,8 @@ struct qed_consq *qed_consq_alloc(struct qed_hwfn *p_hwfn)
                            QED_CHAIN_MODE_PBL,
                            QED_CHAIN_CNT_TYPE_U16,
                            QED_CHAIN_PAGE_SIZE / 0x80,
-                           0x80, &p_consq->chain)) {
-               DP_NOTICE(p_hwfn, "Failed to allocate consq chain");
+                           0x80, &p_consq->chain))
                goto consq_allocate_fail;
-       }
 
        return p_consq;
 
index 51e4c90..a4a3cea 100644 (file)
@@ -60,7 +60,8 @@ static int qed_sp_vf_start(struct qed_hwfn *p_hwfn, struct qed_vf_info *p_vf)
        }
 
        fp_minor = p_vf->acquire.vfdev_info.eth_fp_hsi_minor;
-       if (fp_minor > ETH_HSI_VER_MINOR) {
+       if (fp_minor > ETH_HSI_VER_MINOR &&
+           fp_minor != ETH_HSI_VER_NO_PKT_LEN_TUNN) {
                DP_VERBOSE(p_hwfn,
                           QED_MSG_IOV,
                           "VF [%d] - Requested fp hsi %02x.%02x which is slightly newer than PF's %02x.%02x; Configuring PFs version\n",
@@ -107,8 +108,8 @@ static int qed_sp_vf_stop(struct qed_hwfn *p_hwfn,
        return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
-bool qed_iov_is_valid_vfid(struct qed_hwfn *p_hwfn,
-                          int rel_vf_id, bool b_enabled_only)
+static bool qed_iov_is_valid_vfid(struct qed_hwfn *p_hwfn,
+                                 int rel_vf_id, bool b_enabled_only)
 {
        if (!p_hwfn->pf_iov_info) {
                DP_NOTICE(p_hwfn->cdev, "No iov info\n");
@@ -185,8 +186,8 @@ static bool qed_iov_validate_sb(struct qed_hwfn *p_hwfn,
        return false;
 }
 
-int qed_iov_post_vf_bulletin(struct qed_hwfn *p_hwfn,
-                            int vfid, struct qed_ptt *p_ptt)
+static int qed_iov_post_vf_bulletin(struct qed_hwfn *p_hwfn,
+                                   int vfid, struct qed_ptt *p_ptt)
 {
        struct qed_bulletin_content *p_bulletin;
        int crc_size = sizeof(p_bulletin->crc);
@@ -454,10 +455,8 @@ int qed_iov_alloc(struct qed_hwfn *p_hwfn)
        }
 
        p_sriov = kzalloc(sizeof(*p_sriov), GFP_KERNEL);
-       if (!p_sriov) {
-               DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_sriov'\n");
+       if (!p_sriov)
                return -ENOMEM;
-       }
 
        p_hwfn->pf_iov_info = p_sriov;
 
@@ -506,10 +505,9 @@ int qed_iov_hw_info(struct qed_hwfn *p_hwfn)
 
        /* Allocate a new struct for IOV information */
        cdev->p_iov_info = kzalloc(sizeof(*cdev->p_iov_info), GFP_KERNEL);
-       if (!cdev->p_iov_info) {
-               DP_NOTICE(p_hwfn, "Can't support IOV due to lack of memory\n");
+       if (!cdev->p_iov_info)
                return -ENOMEM;
-       }
+
        cdev->p_iov_info->pos = pos;
 
        rc = qed_iov_pci_cfg_info(cdev);
@@ -575,7 +573,7 @@ static void qed_iov_set_vf_to_disable(struct qed_dev *cdev,
        }
 }
 
-void qed_iov_set_vfs_to_disable(struct qed_dev *cdev, u8 to_disable)
+static void qed_iov_set_vfs_to_disable(struct qed_dev *cdev, u8 to_disable)
 {
        u16 i;
 
@@ -1132,9 +1130,10 @@ static void qed_iov_prepare_resp(struct qed_hwfn *p_hwfn,
        qed_iov_send_response(p_hwfn, p_ptt, vf_info, length, status);
 }
 
-struct qed_public_vf_info *qed_iov_get_public_vf_info(struct qed_hwfn *p_hwfn,
-                                                     u16 relative_vf_id,
-                                                     bool b_enabled_only)
+static struct
+qed_public_vf_info *qed_iov_get_public_vf_info(struct qed_hwfn *p_hwfn,
+                                              u16 relative_vf_id,
+                                              bool b_enabled_only)
 {
        struct qed_vf_info *vf = NULL;
 
@@ -1145,7 +1144,7 @@ struct qed_public_vf_info *qed_iov_get_public_vf_info(struct qed_hwfn *p_hwfn,
        return &vf->p_vf_info;
 }
 
-void qed_iov_clean_vf(struct qed_hwfn *p_hwfn, u8 vfid)
+static void qed_iov_clean_vf(struct qed_hwfn *p_hwfn, u8 vfid)
 {
        struct qed_public_vf_info *vf_info;
 
@@ -1241,6 +1240,16 @@ static u8 qed_iov_vf_mbx_acquire_resc(struct qed_hwfn *p_hwfn,
                           p_req->num_vlan_filters,
                           p_resp->num_vlan_filters,
                           p_req->num_mc_filters, p_resp->num_mc_filters);
+
+               /* Some legacy OSes are incapable of correctly handling this
+                * failure.
+                */
+               if ((p_vf->acquire.vfdev_info.eth_fp_hsi_minor ==
+                    ETH_HSI_VER_NO_PKT_LEN_TUNN) &&
+                   (p_vf->acquire.vfdev_info.os_type ==
+                    VFPF_ACQUIRE_OS_WINDOWS))
+                       return PFVF_STATUS_SUCCESS;
+
                return PFVF_STATUS_NO_RESOURCE;
        }
 
@@ -1280,22 +1289,42 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn,
 
        memset(resp, 0, sizeof(*resp));
 
+       /* Write the PF version so that VF would know which version
+        * is supported - might be later overriden. This guarantees that
+        * VF could recognize legacy PF based on lack of versions in reply.
+        */
+       pfdev_info->major_fp_hsi = ETH_HSI_VER_MAJOR;
+       pfdev_info->minor_fp_hsi = ETH_HSI_VER_MINOR;
+
+       if (vf->state != VF_FREE && vf->state != VF_STOPPED) {
+               DP_VERBOSE(p_hwfn,
+                          QED_MSG_IOV,
+                          "VF[%d] sent ACQUIRE but is already in state %d - fail request\n",
+                          vf->abs_vf_id, vf->state);
+               goto out;
+       }
+
        /* Validate FW compatibility */
        if (req->vfdev_info.eth_fp_hsi_major != ETH_HSI_VER_MAJOR) {
-               DP_INFO(p_hwfn,
-                       "VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's faspath HSI %02x.%02x\n",
-                       vf->abs_vf_id,
-                       req->vfdev_info.eth_fp_hsi_major,
-                       req->vfdev_info.eth_fp_hsi_minor,
-                       ETH_HSI_VER_MAJOR, ETH_HSI_VER_MINOR);
-
-               /* Write the PF version so that VF would know which version
-                * is supported.
-                */
-               pfdev_info->major_fp_hsi = ETH_HSI_VER_MAJOR;
-               pfdev_info->minor_fp_hsi = ETH_HSI_VER_MINOR;
+               if (req->vfdev_info.capabilities &
+                   VFPF_ACQUIRE_CAP_PRE_FP_HSI) {
+                       struct vf_pf_vfdev_info *p_vfdev = &req->vfdev_info;
 
-               goto out;
+                       DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+                                  "VF[%d] is pre-fastpath HSI\n",
+                                  vf->abs_vf_id);
+                       p_vfdev->eth_fp_hsi_major = ETH_HSI_VER_MAJOR;
+                       p_vfdev->eth_fp_hsi_minor = ETH_HSI_VER_NO_PKT_LEN_TUNN;
+               } else {
+                       DP_INFO(p_hwfn,
+                               "VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's faspath HSI %02x.%02x\n",
+                               vf->abs_vf_id,
+                               req->vfdev_info.eth_fp_hsi_major,
+                               req->vfdev_info.eth_fp_hsi_minor,
+                               ETH_HSI_VER_MAJOR, ETH_HSI_VER_MINOR);
+
+                       goto out;
+               }
        }
 
        /* On 100g PFs, prevent old VFs from loading */
@@ -1334,6 +1363,10 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn,
        pfdev_info->fw_minor = FW_MINOR_VERSION;
        pfdev_info->fw_rev = FW_REVISION_VERSION;
        pfdev_info->fw_eng = FW_ENGINEERING_VERSION;
+
+       /* Incorrect when legacy, but doesn't matter as legacy isn't reading
+        * this field.
+        */
        pfdev_info->minor_fp_hsi = min_t(u8, ETH_HSI_VER_MINOR,
                                         req->vfdev_info.eth_fp_hsi_minor);
        pfdev_info->os_type = VFPF_ACQUIRE_OS_LINUX;
@@ -1645,6 +1678,7 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn,
        params.vport_id = vf->vport_id;
        params.max_buffers_per_cqe = start->max_buffers_per_cqe;
        params.mtu = vf->mtu;
+       params.check_mac = true;
 
        rc = qed_sp_eth_vport_start(p_hwfn, &params);
        if (rc) {
@@ -1690,21 +1724,32 @@ static void qed_iov_vf_mbx_stop_vport(struct qed_hwfn *p_hwfn,
 
 static void qed_iov_vf_mbx_start_rxq_resp(struct qed_hwfn *p_hwfn,
                                          struct qed_ptt *p_ptt,
-                                         struct qed_vf_info *vf, u8 status)
+                                         struct qed_vf_info *vf,
+                                         u8 status, bool b_legacy)
 {
        struct qed_iov_vf_mbx *mbx = &vf->vf_mbx;
        struct pfvf_start_queue_resp_tlv *p_tlv;
        struct vfpf_start_rxq_tlv *req;
+       u16 length;
 
        mbx->offset = (u8 *)mbx->reply_virt;
 
+       /* Taking a bigger struct instead of adding a TLV to list was a
+        * mistake, but one which we're now stuck with, as some older
+        * clients assume the size of the previous response.
+        */
+       if (!b_legacy)
+               length = sizeof(*p_tlv);
+       else
+               length = sizeof(struct pfvf_def_resp_tlv);
+
        p_tlv = qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_START_RXQ,
-                           sizeof(*p_tlv));
+                           length);
        qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_LIST_END,
                    sizeof(struct channel_list_end_tlv));
 
        /* Update the TLV with the response */
-       if (status == PFVF_STATUS_SUCCESS) {
+       if ((status == PFVF_STATUS_SUCCESS) && !b_legacy) {
                req = &mbx->req_virt->start_rxq;
                p_tlv->offset = PXP_VF_BAR0_START_MSDM_ZONE_B +
                                offsetof(struct mstorm_vf_zone,
@@ -1712,7 +1757,7 @@ static void qed_iov_vf_mbx_start_rxq_resp(struct qed_hwfn *p_hwfn,
                                sizeof(struct eth_rx_prod_data) * req->rx_qid;
        }
 
-       qed_iov_send_response(p_hwfn, p_ptt, vf, sizeof(*p_tlv), status);
+       qed_iov_send_response(p_hwfn, p_ptt, vf, length, status);
 }
 
 static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
@@ -1723,6 +1768,7 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
        struct qed_iov_vf_mbx *mbx = &vf->vf_mbx;
        u8 status = PFVF_STATUS_NO_RESOURCE;
        struct vfpf_start_rxq_tlv *req;
+       bool b_legacy_vf = false;
        int rc;
 
        memset(&params, 0, sizeof(params));
@@ -1738,13 +1784,27 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
        params.sb = req->hw_sb;
        params.sb_idx = req->sb_index;
 
+       /* Legacy VFs have their Producers in a different location, which they
+        * calculate on their own and clean the producer prior to this.
+        */
+       if (vf->acquire.vfdev_info.eth_fp_hsi_minor ==
+           ETH_HSI_VER_NO_PKT_LEN_TUNN) {
+               b_legacy_vf = true;
+       } else {
+               REG_WR(p_hwfn,
+                      GTT_BAR0_MAP_REG_MSDM_RAM +
+                      MSTORM_ETH_VF_PRODS_OFFSET(vf->abs_vf_id, req->rx_qid),
+                      0);
+       }
+
        rc = qed_sp_eth_rxq_start_ramrod(p_hwfn, vf->opaque_fid,
                                         vf->vf_queues[req->rx_qid].fw_cid,
                                         &params,
                                         vf->abs_vf_id + 0x10,
                                         req->bd_max_bytes,
                                         req->rxq_addr,
-                                        req->cqe_pbl_addr, req->cqe_pbl_size);
+                                        req->cqe_pbl_addr, req->cqe_pbl_size,
+                                        b_legacy_vf);
 
        if (rc) {
                status = PFVF_STATUS_FAILURE;
@@ -1755,7 +1815,7 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
        }
 
 out:
-       qed_iov_vf_mbx_start_rxq_resp(p_hwfn, p_ptt, vf, status);
+       qed_iov_vf_mbx_start_rxq_resp(p_hwfn, p_ptt, vf, status, b_legacy_vf);
 }
 
 static void qed_iov_vf_mbx_start_txq_resp(struct qed_hwfn *p_hwfn,
@@ -1764,23 +1824,38 @@ static void qed_iov_vf_mbx_start_txq_resp(struct qed_hwfn *p_hwfn,
 {
        struct qed_iov_vf_mbx *mbx = &p_vf->vf_mbx;
        struct pfvf_start_queue_resp_tlv *p_tlv;
+       bool b_legacy = false;
+       u16 length;
 
        mbx->offset = (u8 *)mbx->reply_virt;
 
+       /* Taking a bigger struct instead of adding a TLV to list was a
+        * mistake, but one which we're now stuck with, as some older
+        * clients assume the size of the previous response.
+        */
+       if (p_vf->acquire.vfdev_info.eth_fp_hsi_minor ==
+           ETH_HSI_VER_NO_PKT_LEN_TUNN)
+               b_legacy = true;
+
+       if (!b_legacy)
+               length = sizeof(*p_tlv);
+       else
+               length = sizeof(struct pfvf_def_resp_tlv);
+
        p_tlv = qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_START_TXQ,
-                           sizeof(*p_tlv));
+                           length);
        qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_LIST_END,
                    sizeof(struct channel_list_end_tlv));
 
        /* Update the TLV with the response */
-       if (status == PFVF_STATUS_SUCCESS) {
+       if ((status == PFVF_STATUS_SUCCESS) && !b_legacy) {
                u16 qid = mbx->req_virt->start_txq.tx_qid;
 
                p_tlv->offset = qed_db_addr(p_vf->vf_queues[qid].fw_cid,
                                            DQ_DEMS_LEGACY);
        }
 
-       qed_iov_send_response(p_hwfn, p_ptt, p_vf, sizeof(*p_tlv), status);
+       qed_iov_send_response(p_hwfn, p_ptt, p_vf, length, status);
 }
 
 static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn,
@@ -2436,8 +2511,8 @@ qed_iov_vf_update_unicast_shadow(struct qed_hwfn *p_hwfn,
        return rc;
 }
 
-int qed_iov_chk_ucast(struct qed_hwfn *hwfn,
-                     int vfid, struct qed_filter_ucast *params)
+static int qed_iov_chk_ucast(struct qed_hwfn *hwfn,
+                            int vfid, struct qed_filter_ucast *params)
 {
        struct qed_public_vf_info *vf;
 
@@ -2754,7 +2829,8 @@ cleanup:
        return rc;
 }
 
-int qed_iov_vf_flr_cleanup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+static int
+qed_iov_vf_flr_cleanup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        u32 ack_vfs[VF_MAX_STATIC / 32];
        int rc = 0;
@@ -2941,7 +3017,7 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn,
        }
 }
 
-void qed_iov_pf_add_pending_events(struct qed_hwfn *p_hwfn, u8 vfid)
+static void qed_iov_pf_add_pending_events(struct qed_hwfn *p_hwfn, u8 vfid)
 {
        u64 add_bit = 1ULL << (vfid % 64);
 
@@ -3064,8 +3140,8 @@ static void qed_iov_bulletin_set_forced_mac(struct qed_hwfn *p_hwfn,
        qed_iov_configure_vport_forced(p_hwfn, vf_info, feature);
 }
 
-void qed_iov_bulletin_set_forced_vlan(struct qed_hwfn *p_hwfn,
-                                     u16 pvid, int vfid)
+static void qed_iov_bulletin_set_forced_vlan(struct qed_hwfn *p_hwfn,
+                                            u16 pvid, int vfid)
 {
        struct qed_vf_info *vf_info;
        u64 feature;
@@ -3098,7 +3174,7 @@ static bool qed_iov_vf_has_vport_instance(struct qed_hwfn *p_hwfn, int vfid)
        return !!p_vf_info->vport_instance;
 }
 
-bool qed_iov_is_vf_stopped(struct qed_hwfn *p_hwfn, int vfid)
+static bool qed_iov_is_vf_stopped(struct qed_hwfn *p_hwfn, int vfid)
 {
        struct qed_vf_info *p_vf_info;
 
@@ -3120,7 +3196,7 @@ static bool qed_iov_spoofchk_get(struct qed_hwfn *p_hwfn, int vfid)
        return vf_info->spoof_chk;
 }
 
-int qed_iov_spoofchk_set(struct qed_hwfn *p_hwfn, int vfid, bool val)
+static int qed_iov_spoofchk_set(struct qed_hwfn *p_hwfn, int vfid, bool val)
 {
        struct qed_vf_info *vf;
        int rc = -EINVAL;
@@ -3163,7 +3239,8 @@ static u8 *qed_iov_bulletin_get_forced_mac(struct qed_hwfn *p_hwfn,
        return p_vf->bulletin.p_virt->mac;
 }
 
-u16 qed_iov_bulletin_get_forced_vlan(struct qed_hwfn *p_hwfn, u16 rel_vf_id)
+static u16
+qed_iov_bulletin_get_forced_vlan(struct qed_hwfn *p_hwfn, u16 rel_vf_id)
 {
        struct qed_vf_info *p_vf;
 
@@ -3195,7 +3272,8 @@ static int qed_iov_configure_tx_rate(struct qed_hwfn *p_hwfn,
        return qed_init_vport_rl(p_hwfn, p_ptt, abs_vp_id, (u32)val);
 }
 
-int qed_iov_configure_min_tx_rate(struct qed_dev *cdev, int vfid, u32 rate)
+static int
+qed_iov_configure_min_tx_rate(struct qed_dev *cdev, int vfid, u32 rate)
 {
        struct qed_vf_info *vf;
        u8 vport_id;
@@ -3754,7 +3832,8 @@ static void qed_handle_bulletin_post(struct qed_hwfn *hwfn)
        qed_ptt_release(hwfn, ptt);
 }
 
-void qed_iov_pf_task(struct work_struct *work)
+static void qed_iov_pf_task(struct work_struct *work)
+
 {
        struct qed_hwfn *hwfn = container_of(work, struct qed_hwfn,
                                             iov_task.work);
index 9b780b3..85334ce 100644 (file)
@@ -46,6 +46,17 @@ static void *qed_vf_pf_prep(struct qed_hwfn *p_hwfn, u16 type, u16 length)
        return p_tlv;
 }
 
+static void qed_vf_pf_req_end(struct qed_hwfn *p_hwfn, int req_status)
+{
+       union pfvf_tlvs *resp = p_hwfn->vf_iov_info->pf2vf_reply;
+
+       DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+                  "VF request status = 0x%x, PF reply status = 0x%x\n",
+                  req_status, resp->default_resp.hdr.status);
+
+       mutex_unlock(&(p_hwfn->vf_iov_info->mutex));
+}
+
 static int qed_send_msg2pf(struct qed_hwfn *p_hwfn, u8 *done, u32 resp_size)
 {
        union vfpf_tlvs *p_req = p_hwfn->vf_iov_info->vf2pf_request;
@@ -103,16 +114,12 @@ static int qed_send_msg2pf(struct qed_hwfn *p_hwfn, u8 *done, u32 resp_size)
                           "VF <-- PF Timeout [Type %d]\n",
                           p_req->first_tlv.tl.type);
                rc = -EBUSY;
-               goto exit;
        } else {
                DP_VERBOSE(p_hwfn, QED_MSG_IOV,
                           "PF response: %d [Type %d]\n",
                           *done, p_req->first_tlv.tl.type);
        }
 
-exit:
-       mutex_unlock(&(p_hwfn->vf_iov_info->mutex));
-
        return rc;
 }
 
@@ -191,6 +198,9 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn)
                DP_VERBOSE(p_hwfn,
                           QED_MSG_IOV, "attempting to acquire resources\n");
 
+               /* Clear response buffer, as this might be a re-send */
+               memset(p_iov->pf2vf_reply, 0, sizeof(union pfvf_tlvs));
+
                /* send acquire request */
                rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
                if (rc)
@@ -205,9 +215,12 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn)
                        /* PF agrees to allocate our resources */
                        if (!(resp->pfdev_info.capabilities &
                              PFVF_ACQUIRE_CAP_POST_FW_OVERRIDE)) {
-                               DP_INFO(p_hwfn,
-                                       "PF is using old incompatible driver; Either downgrade driver or request provider to update hypervisor version\n");
-                               return -EINVAL;
+                               /* It's possible legacy PF mistakenly accepted;
+                                * but we don't care - simply mark it as
+                                * legacy and continue.
+                                */
+                               req->vfdev_info.capabilities |=
+                                   VFPF_ACQUIRE_CAP_PRE_FP_HSI;
                        }
                        DP_VERBOSE(p_hwfn, QED_MSG_IOV, "resources acquired\n");
                        resources_acquired = true;
@@ -215,27 +228,55 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn)
                           attempts < VF_ACQUIRE_THRESH) {
                        qed_vf_pf_acquire_reduce_resc(p_hwfn, p_resc,
                                                      &resp->resc);
+               } else if (resp->hdr.status == PFVF_STATUS_NOT_SUPPORTED) {
+                       if (pfdev_info->major_fp_hsi &&
+                           (pfdev_info->major_fp_hsi != ETH_HSI_VER_MAJOR)) {
+                               DP_NOTICE(p_hwfn,
+                                         "PF uses an incompatible fastpath HSI %02x.%02x [VF requires %02x.%02x]. Please change to a VF driver using %02x.xx.\n",
+                                         pfdev_info->major_fp_hsi,
+                                         pfdev_info->minor_fp_hsi,
+                                         ETH_HSI_VER_MAJOR,
+                                         ETH_HSI_VER_MINOR,
+                                         pfdev_info->major_fp_hsi);
+                               rc = -EINVAL;
+                               goto exit;
+                       }
 
-                       /* Clear response buffer */
-                       memset(p_iov->pf2vf_reply, 0, sizeof(union pfvf_tlvs));
-               } else if ((resp->hdr.status == PFVF_STATUS_NOT_SUPPORTED) &&
-                          pfdev_info->major_fp_hsi &&
-                          (pfdev_info->major_fp_hsi != ETH_HSI_VER_MAJOR)) {
-                       DP_NOTICE(p_hwfn,
-                                 "PF uses an incompatible fastpath HSI %02x.%02x [VF requires %02x.%02x]. Please change to a VF driver using %02x.xx.\n",
-                                 pfdev_info->major_fp_hsi,
-                                 pfdev_info->minor_fp_hsi,
-                                 ETH_HSI_VER_MAJOR,
-                                 ETH_HSI_VER_MINOR, pfdev_info->major_fp_hsi);
-                       return -EINVAL;
+                       if (!pfdev_info->major_fp_hsi) {
+                               if (req->vfdev_info.capabilities &
+                                   VFPF_ACQUIRE_CAP_PRE_FP_HSI) {
+                                       DP_NOTICE(p_hwfn,
+                                                 "PF uses very old drivers. Please change to a VF driver using no later than 8.8.x.x.\n");
+                                       rc = -EINVAL;
+                                       goto exit;
+                               } else {
+                                       DP_INFO(p_hwfn,
+                                               "PF is old - try re-acquire to see if it supports FW-version override\n");
+                                       req->vfdev_info.capabilities |=
+                                           VFPF_ACQUIRE_CAP_PRE_FP_HSI;
+                                       continue;
+                               }
+                       }
+
+                       /* If PF/VF are using same Major, PF must have had
+                        * it's reasons. Simply fail.
+                        */
+                       DP_NOTICE(p_hwfn, "PF rejected acquisition by VF\n");
+                       rc = -EINVAL;
+                       goto exit;
                } else {
                        DP_ERR(p_hwfn,
                               "PF returned error %d to VF acquisition request\n",
                               resp->hdr.status);
-                       return -EAGAIN;
+                       rc = -EAGAIN;
+                       goto exit;
                }
        }
 
+       /* Mark the PF as legacy, if needed */
+       if (req->vfdev_info.capabilities & VFPF_ACQUIRE_CAP_PRE_FP_HSI)
+               p_iov->b_pre_fp_hsi = true;
+
        /* Update bulletin board size with response from PF */
        p_iov->bulletin.size = resp->bulletin_size;
 
@@ -253,14 +294,18 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn)
                }
        }
 
-       if (ETH_HSI_VER_MINOR &&
+       if (!p_iov->b_pre_fp_hsi &&
+           ETH_HSI_VER_MINOR &&
            (resp->pfdev_info.minor_fp_hsi < ETH_HSI_VER_MINOR)) {
                DP_INFO(p_hwfn,
                        "PF is using older fastpath HSI; %02x.%02x is configured\n",
                        ETH_HSI_VER_MAJOR, resp->pfdev_info.minor_fp_hsi);
        }
 
-       return 0;
+exit:
+       qed_vf_pf_req_end(p_hwfn, rc);
+
+       return rc;
 }
 
 int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn)
@@ -286,31 +331,23 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn)
 
        /* Allocate vf sriov info */
        p_iov = kzalloc(sizeof(*p_iov), GFP_KERNEL);
-       if (!p_iov) {
-               DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_sriov'\n");
+       if (!p_iov)
                return -ENOMEM;
-       }
 
        /* Allocate vf2pf msg */
        p_iov->vf2pf_request = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
                                                  sizeof(union vfpf_tlvs),
                                                  &p_iov->vf2pf_request_phys,
                                                  GFP_KERNEL);
-       if (!p_iov->vf2pf_request) {
-               DP_NOTICE(p_hwfn,
-                         "Failed to allocate `vf2pf_request' DMA memory\n");
+       if (!p_iov->vf2pf_request)
                goto free_p_iov;
-       }
 
        p_iov->pf2vf_reply = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
                                                sizeof(union pfvf_tlvs),
                                                &p_iov->pf2vf_reply_phys,
                                                GFP_KERNEL);
-       if (!p_iov->pf2vf_reply) {
-               DP_NOTICE(p_hwfn,
-                         "Failed to allocate `pf2vf_reply' DMA memory\n");
+       if (!p_iov->pf2vf_reply)
                goto free_vf2pf_request;
-       }
 
        DP_VERBOSE(p_hwfn,
                   QED_MSG_IOV,
@@ -347,6 +384,9 @@ free_p_iov:
 
        return -ENOMEM;
 }
+#define TSTORM_QZONE_START   PXP_VF_BAR0_START_SDM_ZONE_A
+#define MSTORM_QZONE_START(dev)   (TSTORM_QZONE_START +        \
+                                  (TSTORM_QZONE_SIZE * NUM_OF_L2_QUEUES(dev)))
 
 int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
                        u8 rx_qid,
@@ -374,6 +414,21 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
        req->bd_max_bytes = bd_max_bytes;
        req->stat_id = -1;
 
+       /* If PF is legacy, we'll need to calculate producers ourselves
+        * as well as clean them.
+        */
+       if (pp_prod && p_iov->b_pre_fp_hsi) {
+               u8 hw_qid = p_iov->acquire_resp.resc.hw_qid[rx_qid];
+               u32 init_prod_val = 0;
+
+               *pp_prod = (u8 __iomem *)p_hwfn->regview +
+                                        MSTORM_QZONE_START(p_hwfn->cdev) +
+                                        hw_qid * MSTORM_QZONE_SIZE;
+
+               /* Init the rcq, rx bd and rx sge (if valid) producers to 0 */
+               __internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32),
+                                 (u32 *)(&init_prod_val));
+       }
        /* add list termination tlv */
        qed_add_tlv(p_hwfn, &p_iov->offset,
                    CHANNEL_TLV_LIST_END, sizeof(struct channel_list_end_tlv));
@@ -381,13 +436,15 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
        resp = &p_iov->pf2vf_reply->queue_start;
        rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
        if (rc)
-               return rc;
+               goto exit;
 
-       if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-               return -EINVAL;
+       if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+               rc = -EINVAL;
+               goto exit;
+       }
 
        /* Learn the address of the producer from the response */
-       if (pp_prod) {
+       if (pp_prod && !p_iov->b_pre_fp_hsi) {
                u32 init_prod_val = 0;
 
                *pp_prod = (u8 __iomem *)p_hwfn->regview + resp->offset;
@@ -399,6 +456,8 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
                __internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32),
                                  (u32 *)&init_prod_val);
        }
+exit:
+       qed_vf_pf_req_end(p_hwfn, rc);
 
        return rc;
 }
@@ -424,10 +483,15 @@ int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn, u16 rx_qid, bool cqe_completion)
        resp = &p_iov->pf2vf_reply->default_resp;
        rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
        if (rc)
-               return rc;
+               goto exit;
+
+       if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+               rc = -EINVAL;
+               goto exit;
+       }
 
-       if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-               return -EINVAL;
+exit:
+       qed_vf_pf_req_end(p_hwfn, rc);
 
        return rc;
 }
@@ -470,13 +534,27 @@ int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
        }
 
        if (pp_doorbell) {
-               *pp_doorbell = (u8 __iomem *)p_hwfn->doorbells + resp->offset;
+               /* Modern PFs provide the actual offsets, while legacy
+                * provided only the queue id.
+                */
+               if (!p_iov->b_pre_fp_hsi) {
+                       *pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
+                                                    resp->offset;
+               } else {
+                       u8 cid = p_iov->acquire_resp.resc.cid[tx_queue_id];
+                       u32 db_addr;
+
+                       db_addr = qed_db_addr(cid, DQ_DEMS_LEGACY);
+                       *pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
+                                                    db_addr;
+               }
 
                DP_VERBOSE(p_hwfn, QED_MSG_IOV,
                           "Txq[0x%02x]: doorbell at %p [offset 0x%08x]\n",
                           tx_queue_id, *pp_doorbell, resp->offset);
        }
 exit:
+       qed_vf_pf_req_end(p_hwfn, rc);
 
        return rc;
 }
@@ -501,10 +579,15 @@ int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, u16 tx_qid)
        resp = &p_iov->pf2vf_reply->default_resp;
        rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
        if (rc)
-               return rc;
+               goto exit;
 
-       if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-               return -EINVAL;
+       if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+               rc = -EINVAL;
+               goto exit;
+       }
+
+exit:
+       qed_vf_pf_req_end(p_hwfn, rc);
 
        return rc;
 }
@@ -543,10 +626,15 @@ int qed_vf_pf_vport_start(struct qed_hwfn *p_hwfn,
        resp = &p_iov->pf2vf_reply->default_resp;
        rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
        if (rc)
-               return rc;
+               goto exit;
 
-       if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-               return -EINVAL;
+       if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+               rc = -EINVAL;
+               goto exit;
+       }
+
+exit:
+       qed_vf_pf_req_end(p_hwfn, rc);
 
        return rc;
 }
@@ -567,10 +655,15 @@ int qed_vf_pf_vport_stop(struct qed_hwfn *p_hwfn)
 
        rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
        if (rc)
-               return rc;
+               goto exit;
+
+       if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+               rc = -EINVAL;
+               goto exit;
+       }
 
-       if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-               return -EINVAL;
+exit:
+       qed_vf_pf_req_end(p_hwfn, rc);
 
        return rc;
 }
@@ -770,13 +863,18 @@ int qed_vf_pf_vport_update(struct qed_hwfn *p_hwfn,
 
        rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, resp_size);
        if (rc)
-               return rc;
+               goto exit;
 
-       if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-               return -EINVAL;
+       if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+               rc = -EINVAL;
+               goto exit;
+       }
 
        qed_vf_handle_vp_update_tlvs_resp(p_hwfn, p_params);
 
+exit:
+       qed_vf_pf_req_end(p_hwfn, rc);
+
        return rc;
 }
 
@@ -797,14 +895,19 @@ int qed_vf_pf_reset(struct qed_hwfn *p_hwfn)
        resp = &p_iov->pf2vf_reply->default_resp;
        rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
        if (rc)
-               return rc;
+               goto exit;
 
-       if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-               return -EAGAIN;
+       if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+               rc = -EAGAIN;
+               goto exit;
+       }
 
        p_hwfn->b_int_enabled = 0;
 
-       return 0;
+exit:
+       qed_vf_pf_req_end(p_hwfn, rc);
+
+       return rc;
 }
 
 int qed_vf_pf_release(struct qed_hwfn *p_hwfn)
@@ -828,6 +931,8 @@ int qed_vf_pf_release(struct qed_hwfn *p_hwfn)
        if (!rc && resp->hdr.status != PFVF_STATUS_SUCCESS)
                rc = -EAGAIN;
 
+       qed_vf_pf_req_end(p_hwfn, rc);
+
        p_hwfn->b_int_enabled = 0;
 
        if (p_iov->vf2pf_request)
@@ -896,12 +1001,17 @@ int qed_vf_pf_filter_ucast(struct qed_hwfn *p_hwfn,
        resp = &p_iov->pf2vf_reply->default_resp;
        rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
        if (rc)
-               return rc;
+               goto exit;
 
-       if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-               return -EAGAIN;
+       if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+               rc = -EAGAIN;
+               goto exit;
+       }
 
-       return 0;
+exit:
+       qed_vf_pf_req_end(p_hwfn, rc);
+
+       return rc;
 }
 
 int qed_vf_pf_int_cleanup(struct qed_hwfn *p_hwfn)
@@ -920,12 +1030,17 @@ int qed_vf_pf_int_cleanup(struct qed_hwfn *p_hwfn)
 
        rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
        if (rc)
-               return rc;
+               goto exit;
 
-       if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-               return -EINVAL;
+       if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+               rc = -EINVAL;
+               goto exit;
+       }
 
-       return 0;
+exit:
+       qed_vf_pf_req_end(p_hwfn, rc);
+
+       return rc;
 }
 
 u16 qed_vf_get_igu_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id)
@@ -1071,8 +1186,8 @@ bool qed_vf_check_mac(struct qed_hwfn *p_hwfn, u8 *mac)
        return false;
 }
 
-bool qed_vf_bulletin_get_forced_mac(struct qed_hwfn *hwfn,
-                                   u8 *dst_mac, u8 *p_is_forced)
+static bool qed_vf_bulletin_get_forced_mac(struct qed_hwfn *hwfn,
+                                          u8 *dst_mac, u8 *p_is_forced)
 {
        struct qed_bulletin_content *bulletin;
 
index b23ce58..35db7a2 100644 (file)
@@ -86,7 +86,7 @@ struct vfpf_acquire_tlv {
        struct vfpf_first_tlv first_tlv;
 
        struct vf_pf_vfdev_info {
-#define VFPF_ACQUIRE_CAP_OBSOLETE      (1 << 0)
+#define VFPF_ACQUIRE_CAP_PRE_FP_HSI     (1 << 0) /* VF pre-FP hsi version */
 #define VFPF_ACQUIRE_CAP_100G          (1 << 1) /* VF can support 100g */
                u64 capabilities;
                u8 fw_major;
@@ -551,6 +551,11 @@ struct qed_vf_iov {
 
        /* we set aside a copy of the acquire response */
        struct pfvf_acquire_resp_tlv acquire_resp;
+
+       /* In case PF originates prior to the fp-hsi version comparison,
+        * this has to be propagated as it affects the fastpath.
+        */
+       bool b_pre_fp_hsi;
 };
 
 #ifdef CONFIG_QED_SRIOV
index 02b06d4..e01adce 100644 (file)
@@ -25,7 +25,7 @@
 
 #define QEDE_MAJOR_VERSION             8
 #define QEDE_MINOR_VERSION             10
-#define QEDE_REVISION_VERSION          1
+#define QEDE_REVISION_VERSION          9
 #define QEDE_ENGINEERING_VERSION       20
 #define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "." \
                __stringify(QEDE_MINOR_VERSION) "."             \
@@ -36,6 +36,8 @@
 
 struct qede_stats {
        u64 no_buff_discards;
+       u64 packet_too_big_discard;
+       u64 ttl0_discard;
        u64 rx_ucast_bytes;
        u64 rx_mcast_bytes;
        u64 rx_bcast_bytes;
@@ -124,16 +126,22 @@ struct qede_dev {
                                 (edev)->dev_info.num_tc)
 
        struct qede_fastpath            *fp_array;
-       u16                             req_rss;
-       u16                             num_rss;
+       u8                              req_num_tx;
+       u8                              fp_num_tx;
+       u8                              req_num_rx;
+       u8                              fp_num_rx;
+       u16                             req_queues;
+       u16                             num_queues;
        u8                              num_tc;
-#define QEDE_RSS_CNT(edev)             ((edev)->num_rss)
-#define QEDE_TSS_CNT(edev)             ((edev)->num_rss *      \
-                                        (edev)->num_tc)
-#define QEDE_TSS_IDX(edev, txqidx)     ((txqidx) % (edev)->num_rss)
-#define QEDE_TC_IDX(edev, txqidx)      ((txqidx) / (edev)->num_rss)
+#define QEDE_QUEUE_CNT(edev)   ((edev)->num_queues)
+#define QEDE_RSS_COUNT(edev)   ((edev)->num_queues - (edev)->fp_num_tx)
+#define QEDE_TSS_COUNT(edev)   (((edev)->num_queues - (edev)->fp_num_rx) * \
+                                (edev)->num_tc)
+#define QEDE_TX_IDX(edev, txqidx)      ((edev)->fp_num_rx + (txqidx) % \
+                                        QEDE_TSS_COUNT(edev))
+#define QEDE_TC_IDX(edev, txqidx)      ((txqidx) / QEDE_TSS_COUNT(edev))
 #define QEDE_TX_QUEUE(edev, txqidx)    \
-       (&(edev)->fp_array[QEDE_TSS_IDX((edev), (txqidx))].txqs[QEDE_TC_IDX( \
+       (&(edev)->fp_array[QEDE_TX_IDX((edev), (txqidx))].txqs[QEDE_TC_IDX(\
                                                        (edev), (txqidx))])
 
        struct qed_int_info             int_info;
@@ -235,6 +243,7 @@ struct qede_rx_queue {
        u16                     num_rx_buffers;
        u16                     rxq_id;
 
+       u64                     rcv_pkts;
        u64                     rx_hw_errors;
        u64                     rx_alloc_errors;
        u64                     rx_ip_frags;
@@ -263,6 +272,10 @@ struct qede_tx_queue {
        union db_prod           tx_db;
 
        u16                     num_tx_buffers;
+       u64                     xmit_pkts;
+       u64                     stopped_cnt;
+
+       bool                    is_legacy;
 };
 
 #define BD_UNMAP_ADDR(bd)              HILO_U64(le32_to_cpu((bd)->addr.hi), \
@@ -277,7 +290,11 @@ struct qede_tx_queue {
 
 struct qede_fastpath {
        struct qede_dev *edev;
-       u8                      rss_id;
+#define QEDE_FASTPATH_TX       BIT(0)
+#define QEDE_FASTPATH_RX       BIT(1)
+#define QEDE_FASTPATH_COMBINED (QEDE_FASTPATH_TX | QEDE_FASTPATH_RX)
+       u8                      type;
+       u8                      id;
        struct napi_struct      napi;
        struct qed_sb_info      *sb_info;
        struct qede_rx_queue    *rxq;
@@ -337,6 +354,6 @@ void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, struct qede_dev *edev,
 
 #define QEDE_MIN_PKT_LEN       64
 #define QEDE_RX_HDR_SIZE       256
-#define        for_each_rss(i) for (i = 0; i < edev->num_rss; i++)
+#define        for_each_queue(i) for (i = 0; i < edev->num_queues; i++)
 
 #endif /* _QEDE_H_ */
index 427e043..25a9b29 100644 (file)
@@ -35,6 +35,7 @@ static const struct {
        u64 offset;
        char string[ETH_GSTRING_LEN];
 } qede_rqstats_arr[] = {
+       QEDE_RQSTAT(rcv_pkts),
        QEDE_RQSTAT(rx_hw_errors),
        QEDE_RQSTAT(rx_alloc_errors),
        QEDE_RQSTAT(rx_ip_frags),
@@ -44,6 +45,24 @@ static const struct {
 #define QEDE_RQSTATS_DATA(dev, sindex, rqindex) \
        (*((u64 *)(((char *)(dev->fp_array[(rqindex)].rxq)) +\
                    qede_rqstats_arr[(sindex)].offset)))
+#define QEDE_TQSTAT_OFFSET(stat_name) \
+       (offsetof(struct qede_tx_queue, stat_name))
+#define QEDE_TQSTAT_STRING(stat_name) (#stat_name)
+#define QEDE_TQSTAT(stat_name) \
+       {QEDE_TQSTAT_OFFSET(stat_name), QEDE_TQSTAT_STRING(stat_name)}
+#define QEDE_NUM_TQSTATS ARRAY_SIZE(qede_tqstats_arr)
+static const struct {
+       u64 offset;
+       char string[ETH_GSTRING_LEN];
+} qede_tqstats_arr[] = {
+       QEDE_TQSTAT(xmit_pkts),
+       QEDE_TQSTAT(stopped_cnt),
+};
+
+#define QEDE_TQSTATS_DATA(dev, sindex, tssid, tcid) \
+       (*((u64 *)(((void *)(&dev->fp_array[tssid].txqs[tcid])) +\
+                  qede_tqstats_arr[(sindex)].offset)))
+
 static const struct {
        u64 offset;
        char string[ETH_GSTRING_LEN];
@@ -107,6 +126,8 @@ static const struct {
        QEDE_PF_STAT(mftag_filter_discards),
        QEDE_PF_STAT(mac_filter_discards),
        QEDE_STAT(tx_err_drop_pkts),
+       QEDE_STAT(ttl0_discard),
+       QEDE_STAT(packet_too_big_discard),
 
        QEDE_STAT(coalesced_pkts),
        QEDE_STAT(coalesced_events),
@@ -151,17 +172,29 @@ static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf)
 {
        int i, j, k;
 
+       for (i = 0, k = 0; i < QEDE_QUEUE_CNT(edev); i++) {
+               int tc;
+
+               for (j = 0; j < QEDE_NUM_RQSTATS; j++)
+                       sprintf(buf + (k + j) * ETH_GSTRING_LEN,
+                               "%d:   %s", i, qede_rqstats_arr[j].string);
+               k += QEDE_NUM_RQSTATS;
+               for (tc = 0; tc < edev->num_tc; tc++) {
+                       for (j = 0; j < QEDE_NUM_TQSTATS; j++)
+                               sprintf(buf + (k + j) * ETH_GSTRING_LEN,
+                                       "%d.%d: %s", i, tc,
+                                       qede_tqstats_arr[j].string);
+                       k += QEDE_NUM_TQSTATS;
+               }
+       }
+
        for (i = 0, j = 0; i < QEDE_NUM_STATS; i++) {
                if (IS_VF(edev) && qede_stats_arr[i].pf_only)
                        continue;
-               strcpy(buf + j * ETH_GSTRING_LEN,
+               strcpy(buf + (k + j) * ETH_GSTRING_LEN,
                       qede_stats_arr[i].string);
                j++;
        }
-
-       for (k = 0; k < QEDE_NUM_RQSTATS; k++, j++)
-               strcpy(buf + j * ETH_GSTRING_LEN,
-                      qede_rqstats_arr[k].string);
 }
 
 static void qede_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
@@ -197,19 +230,30 @@ static void qede_get_ethtool_stats(struct net_device *dev,
 
        mutex_lock(&edev->qede_lock);
 
+       for (qid = 0; qid < QEDE_QUEUE_CNT(edev); qid++) {
+               int tc;
+
+               if (edev->fp_array[qid].type & QEDE_FASTPATH_RX) {
+                       for (sidx = 0; sidx < QEDE_NUM_RQSTATS; sidx++)
+                               buf[cnt++] = QEDE_RQSTATS_DATA(edev, sidx, qid);
+               }
+
+               if (edev->fp_array[qid].type & QEDE_FASTPATH_TX) {
+                       for (tc = 0; tc < edev->num_tc; tc++) {
+                               for (sidx = 0; sidx < QEDE_NUM_TQSTATS; sidx++)
+                                       buf[cnt++] = QEDE_TQSTATS_DATA(edev,
+                                                                      sidx,
+                                                                      qid, tc);
+                       }
+               }
+       }
+
        for (sidx = 0; sidx < QEDE_NUM_STATS; sidx++) {
                if (IS_VF(edev) && qede_stats_arr[sidx].pf_only)
                        continue;
                buf[cnt++] = QEDE_STATS_DATA(edev, sidx);
        }
 
-       for (sidx = 0; sidx < QEDE_NUM_RQSTATS; sidx++) {
-               buf[cnt] = 0;
-               for (qid = 0; qid < edev->num_rss; qid++)
-                       buf[cnt] += QEDE_RQSTATS_DATA(edev, sidx, qid);
-               cnt++;
-       }
-
        mutex_unlock(&edev->qede_lock);
 }
 
@@ -227,7 +271,8 @@ static int qede_get_sset_count(struct net_device *dev, int stringset)
                                if (qede_stats_arr[i].pf_only)
                                        num_stats--;
                }
-               return num_stats + QEDE_NUM_RQSTATS;
+               return num_stats + QEDE_RSS_COUNT(edev) * QEDE_NUM_RQSTATS +
+                      QEDE_TSS_COUNT(edev) * QEDE_NUM_TQSTATS * edev->num_tc;
        case ETH_SS_PRIV_FLAGS:
                return QEDE_PRI_FLAG_LEN;
        case ETH_SS_TEST:
@@ -375,7 +420,7 @@ static int qede_set_link_ksettings(struct net_device *dev,
                        }
                        params.adv_speeds = QED_LM_40000baseLR4_Full_BIT;
                        break;
-               case 0xdead:
+               case SPEED_50000:
                        if (!(current_link.supported_caps &
                              QED_LM_50000baseKR2_Full_BIT)) {
                                DP_INFO(edev, "50G speed not supported\n");
@@ -383,7 +428,7 @@ static int qede_set_link_ksettings(struct net_device *dev,
                        }
                        params.adv_speeds = QED_LM_50000baseKR2_Full_BIT;
                        break;
-               case 0xbeef:
+               case SPEED_100000:
                        if (!(current_link.supported_caps &
                              QED_LM_100000baseKR4_Full_BIT)) {
                                DP_INFO(edev, "100G speed not supported\n");
@@ -537,7 +582,7 @@ static int qede_set_coalesce(struct net_device *dev,
 
        rxc = (u16)coal->rx_coalesce_usecs;
        txc = (u16)coal->tx_coalesce_usecs;
-       for_each_rss(i) {
+       for_each_queue(i) {
                sb_id = edev->fp_array[i].sb_info->igu_sb_id;
                rc = edev->ops->common->set_coalesce(edev->cdev, rxc, txc,
                                                     (u8)i, sb_id);
@@ -633,7 +678,7 @@ static int qede_set_pauseparam(struct net_device *dev,
        memset(&params, 0, sizeof(params));
        params.override_flags |= QED_LINK_OVERRIDE_PAUSE_CONFIG;
        if (epause->autoneg) {
-               if (!(current_link.supported_caps & SUPPORTED_Autoneg)) {
+               if (!(current_link.supported_caps & QED_LM_Autoneg_BIT)) {
                        DP_INFO(edev, "autoneg not supported\n");
                        return -EINVAL;
                }
@@ -650,6 +695,28 @@ static int qede_set_pauseparam(struct net_device *dev,
        return 0;
 }
 
+static void qede_get_regs(struct net_device *ndev,
+                         struct ethtool_regs *regs, void *buffer)
+{
+       struct qede_dev *edev = netdev_priv(ndev);
+
+       regs->version = 0;
+       memset(buffer, 0, regs->len);
+
+       if (edev->ops && edev->ops->common)
+               edev->ops->common->dbg_all_data(edev->cdev, buffer);
+}
+
+static int qede_get_regs_len(struct net_device *ndev)
+{
+       struct qede_dev *edev = netdev_priv(ndev);
+
+       if (edev->ops && edev->ops->common)
+               return edev->ops->common->dbg_all_data_size(edev->cdev);
+       else
+               return -EINVAL;
+}
+
 static void qede_update_mtu(struct qede_dev *edev, union qede_reload_args *args)
 {
        edev->ndev->mtu = args->mtu;
@@ -689,45 +756,70 @@ static void qede_get_channels(struct net_device *dev,
        struct qede_dev *edev = netdev_priv(dev);
 
        channels->max_combined = QEDE_MAX_RSS_CNT(edev);
-       channels->combined_count = QEDE_RSS_CNT(edev);
+       channels->combined_count = QEDE_QUEUE_CNT(edev) - edev->fp_num_tx -
+                                       edev->fp_num_rx;
+       channels->tx_count = edev->fp_num_tx;
+       channels->rx_count = edev->fp_num_rx;
 }
 
 static int qede_set_channels(struct net_device *dev,
                             struct ethtool_channels *channels)
 {
        struct qede_dev *edev = netdev_priv(dev);
+       u32 count;
 
        DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
                   "set-channels command parameters: rx = %d, tx = %d, other = %d, combined = %d\n",
                   channels->rx_count, channels->tx_count,
                   channels->other_count, channels->combined_count);
 
-       /* We don't support separate rx / tx, nor `other' channels. */
-       if (channels->rx_count || channels->tx_count ||
-           channels->other_count || (channels->combined_count == 0) ||
-           (channels->combined_count > QEDE_MAX_RSS_CNT(edev))) {
+       count = channels->rx_count + channels->tx_count +
+                       channels->combined_count;
+
+       /* We don't support `other' channels */
+       if (channels->other_count) {
                DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
                           "command parameters not supported\n");
                return -EINVAL;
        }
 
+       if (!(channels->combined_count || (channels->rx_count &&
+                                          channels->tx_count))) {
+               DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
+                          "need to request at least one transmit and one receive channel\n");
+               return -EINVAL;
+       }
+
+       if (count > QEDE_MAX_RSS_CNT(edev)) {
+               DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
+                          "requested channels = %d max supported channels = %d\n",
+                          count, QEDE_MAX_RSS_CNT(edev));
+               return -EINVAL;
+       }
+
        /* Check if there was a change in the active parameters */
-       if (channels->combined_count == QEDE_RSS_CNT(edev)) {
+       if ((count == QEDE_QUEUE_CNT(edev)) &&
+           (channels->tx_count == edev->fp_num_tx) &&
+           (channels->rx_count == edev->fp_num_rx)) {
                DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
                           "No change in active parameters\n");
                return 0;
        }
 
        /* We need the number of queues to be divisible between the hwfns */
-       if (channels->combined_count % edev->dev_info.common.num_hwfns) {
+       if ((count % edev->dev_info.common.num_hwfns) ||
+           (channels->tx_count % edev->dev_info.common.num_hwfns) ||
+           (channels->rx_count % edev->dev_info.common.num_hwfns)) {
                DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
-                          "Number of channels must be divisable by %04x\n",
+                          "Number of channels must be divisible by %04x\n",
                           edev->dev_info.common.num_hwfns);
                return -EINVAL;
        }
 
        /* Set number of queues and reload if necessary */
-       edev->req_rss = channels->combined_count;
+       edev->req_queues = count;
+       edev->req_num_tx = channels->tx_count;
+       edev->req_num_rx = channels->rx_count;
        if (netif_running(dev))
                qede_reload(edev, NULL, NULL);
 
@@ -797,7 +889,7 @@ static int qede_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 
        switch (info->cmd) {
        case ETHTOOL_GRXRINGS:
-               info->data = edev->num_rss;
+               info->data = QEDE_RSS_COUNT(edev);
                return 0;
        case ETHTOOL_GRXFH:
                return qede_get_rss_flags(edev, info);
@@ -1000,7 +1092,7 @@ static void qede_netif_start(struct qede_dev *edev)
        if (!netif_running(edev->ndev))
                return;
 
-       for_each_rss(i) {
+       for_each_queue(i) {
                /* Update and reenable interrupts */
                qed_sb_ack(edev->fp_array[i].sb_info, IGU_INT_ENABLE, 1);
                napi_enable(&edev->fp_array[i].napi);
@@ -1012,7 +1104,7 @@ static void qede_netif_stop(struct qede_dev *edev)
 {
        int i;
 
-       for_each_rss(i) {
+       for_each_queue(i) {
                napi_disable(&edev->fp_array[i].napi);
                /* Disable interrupts */
                qed_sb_ack(edev->fp_array[i].sb_info, IGU_INT_DISABLE, 0);
@@ -1022,11 +1114,23 @@ static void qede_netif_stop(struct qede_dev *edev)
 static int qede_selftest_transmit_traffic(struct qede_dev *edev,
                                          struct sk_buff *skb)
 {
-       struct qede_tx_queue *txq = &edev->fp_array[0].txqs[0];
+       struct qede_tx_queue *txq = NULL;
        struct eth_tx_1st_bd *first_bd;
        dma_addr_t mapping;
        int i, idx, val;
 
+       for_each_queue(i) {
+               if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
+                       txq = edev->fp_array[i].txqs;
+                       break;
+               }
+       }
+
+       if (!txq) {
+               DP_NOTICE(edev, "Tx path is not available\n");
+               return -1;
+       }
+
        /* Fill the entry in the SW ring and the BDs in the FW ring */
        idx = txq->sw_tx_prod & NUM_TX_BDS_MAX;
        txq->sw_tx_ring[idx].skb = skb;
@@ -1090,14 +1194,26 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev,
 
 static int qede_selftest_receive_traffic(struct qede_dev *edev)
 {
-       struct qede_rx_queue *rxq = edev->fp_array[0].rxq;
        u16 hw_comp_cons, sw_comp_cons, sw_rx_index, len;
        struct eth_fast_path_rx_reg_cqe *fp_cqe;
+       struct qede_rx_queue *rxq = NULL;
        struct sw_rx_data *sw_rx_data;
        union eth_rx_cqe *cqe;
        u8 *data_ptr;
        int i;
 
+       for_each_queue(i) {
+               if (edev->fp_array[i].type & QEDE_FASTPATH_RX) {
+                       rxq = edev->fp_array[i].rxq;
+                       break;
+               }
+       }
+
+       if (!rxq) {
+               DP_NOTICE(edev, "Rx path is not available\n");
+               return -1;
+       }
+
        /* The packet is expected to receive on rx-queue 0 even though RSS is
         * enabled. This is because the queue 0 is configured as the default
         * queue and that the loopback traffic is not IP.
@@ -1301,6 +1417,8 @@ static const struct ethtool_ops qede_ethtool_ops = {
        .get_link_ksettings = qede_get_link_ksettings,
        .set_link_ksettings = qede_set_link_ksettings,
        .get_drvinfo = qede_get_drvinfo,
+       .get_regs_len = qede_get_regs_len,
+       .get_regs = qede_get_regs,
        .get_msglevel = qede_get_msglevel,
        .set_msglevel = qede_set_msglevel,
        .nway_reset = qede_nway_reset,
index 5ce8a3c..cd23a29 100644 (file)
@@ -519,7 +519,7 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 
        /* Get tx-queue context and netdev index */
        txq_index = skb_get_queue_mapping(skb);
-       WARN_ON(txq_index >= QEDE_TSS_CNT(edev));
+       WARN_ON(txq_index >= QEDE_TSS_COUNT(edev));
        txq = QEDE_TX_QUEUE(edev, txq_index);
        netdev_txq = netdev_get_tx_queue(ndev, txq_index);
 
@@ -598,6 +598,14 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
                            1 << ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT;
                }
 
+               /* Legacy FW had flipped behavior in regard to this bit -
+                * I.e., needed to set to prevent FW from touching encapsulated
+                * packets when it didn't need to.
+                */
+               if (unlikely(txq->is_legacy))
+                       first_bd->data.bitfields ^=
+                           1 << ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT;
+
                /* If the packet is IPv6 with extension header, indicate that
                 * to FW and pass few params, since the device cracker doesn't
                 * support parsing IPv6 with extension header/s.
@@ -714,12 +722,16 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
        txq->tx_db.data.bd_prod =
                cpu_to_le16(qed_chain_get_prod_idx(&txq->tx_pbl));
 
-       if (!skb->xmit_more || netif_tx_queue_stopped(netdev_txq))
+       if (!skb->xmit_more || netif_xmit_stopped(netdev_txq))
                qede_update_tx_producer(txq);
 
        if (unlikely(qed_chain_get_elem_left(&txq->tx_pbl)
                      < (MAX_SKB_FRAGS + 1))) {
+               if (skb->xmit_more)
+                       qede_update_tx_producer(txq);
+
                netif_tx_stop_queue(netdev_txq);
+               txq->stopped_cnt++;
                DP_VERBOSE(edev, NETIF_MSG_TX_QUEUED,
                           "Stop queue was called\n");
                /* paired memory barrier is in qede_tx_int(), we have to keep
@@ -779,6 +791,7 @@ static int qede_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq)
                bytes_compl += len;
                pkts_compl++;
                txq->sw_tx_cons++;
+               txq->xmit_pkts++;
        }
 
        netdev_tx_completed_queue(netdev_txq, pkts_compl, bytes_compl);
@@ -1193,7 +1206,7 @@ static void qede_gro_receive(struct qede_dev *edev,
 #endif
 
 send_skb:
-       skb_record_rx_queue(skb, fp->rss_id);
+       skb_record_rx_queue(skb, fp->rxq->rxq_id);
        qede_skb_receive(edev, fp, skb, vlan_tag);
 }
 
@@ -1397,7 +1410,7 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget)
 
                if (unlikely(cqe_type == ETH_RX_CQE_TYPE_SLOW_PATH)) {
                        edev->ops->eth_cqe_completion(
-                                       edev->cdev, fp->rss_id,
+                                       edev->cdev, fp->id,
                                        (struct eth_slow_path_rx_cqe *)cqe);
                        goto next_cqe;
                }
@@ -1568,7 +1581,7 @@ alloc_skb:
 
                qede_set_skb_csum(skb, csum_flag);
 
-               skb_record_rx_queue(skb, fp->rss_id);
+               skb_record_rx_queue(skb, fp->rxq->rxq_id);
 
                qede_skb_receive(edev, fp, skb, le16_to_cpu(fp_cqe->vlan_tag));
 next_rx_only:
@@ -1587,6 +1600,8 @@ next_cqe: /* don't consume bd rx buffer */
        /* Update producers */
        qede_update_rx_prod(edev, rxq);
 
+       rxq->rcv_pkts += rx_pkt;
+
        return rx_pkt;
 }
 
@@ -1599,10 +1614,12 @@ static int qede_poll(struct napi_struct *napi, int budget)
        u8 tc;
 
        for (tc = 0; tc < edev->num_tc; tc++)
-               if (qede_txq_has_work(&fp->txqs[tc]))
+               if (likely(fp->type & QEDE_FASTPATH_TX) &&
+                   qede_txq_has_work(&fp->txqs[tc]))
                        qede_tx_int(edev, &fp->txqs[tc]);
 
-       rx_work_done = qede_has_rx_work(fp->rxq) ?
+       rx_work_done = (likely(fp->type & QEDE_FASTPATH_RX) &&
+                       qede_has_rx_work(fp->rxq)) ?
                        qede_rx_int(fp, budget) : 0;
        if (rx_work_done < budget) {
                qed_sb_update_sb_idx(fp->sb_info);
@@ -1622,8 +1639,10 @@ static int qede_poll(struct napi_struct *napi, int budget)
                rmb();
 
                /* Fall out from the NAPI loop if needed */
-               if (!(qede_has_rx_work(fp->rxq) ||
-                     qede_has_tx_work(fp))) {
+               if (!((likely(fp->type & QEDE_FASTPATH_RX) &&
+                      qede_has_rx_work(fp->rxq)) ||
+                     (likely(fp->type & QEDE_FASTPATH_TX) &&
+                      qede_has_tx_work(fp)))) {
                        napi_complete(napi);
 
                        /* Update and reenable interrupts */
@@ -1694,6 +1713,8 @@ void qede_fill_by_demand_stats(struct qede_dev *edev)
 
        edev->ops->get_vport_stats(edev->cdev, &stats);
        edev->stats.no_buff_discards = stats.no_buff_discards;
+       edev->stats.packet_too_big_discard = stats.packet_too_big_discard;
+       edev->stats.ttl0_discard = stats.ttl0_discard;
        edev->stats.rx_ucast_bytes = stats.rx_ucast_bytes;
        edev->stats.rx_mcast_bytes = stats.rx_mcast_bytes;
        edev->stats.rx_bcast_bytes = stats.rx_bcast_bytes;
@@ -2095,7 +2116,7 @@ static void qede_vlan_mark_nonconfigured(struct qede_dev *edev)
        edev->accept_any_vlan = false;
 }
 
-int qede_set_features(struct net_device *dev, netdev_features_t features)
+static int qede_set_features(struct net_device *dev, netdev_features_t features)
 {
        struct qede_dev *edev = netdev_priv(dev);
        netdev_features_t changes = features ^ dev->features;
@@ -2335,7 +2356,7 @@ static void qede_free_fp_array(struct qede_dev *edev)
                struct qede_fastpath *fp;
                int i;
 
-               for_each_rss(i) {
+               for_each_queue(i) {
                        fp = &edev->fp_array[i];
 
                        kfree(fp->sb_info);
@@ -2344,22 +2365,33 @@ static void qede_free_fp_array(struct qede_dev *edev)
                }
                kfree(edev->fp_array);
        }
-       edev->num_rss = 0;
+
+       edev->num_queues = 0;
+       edev->fp_num_tx = 0;
+       edev->fp_num_rx = 0;
 }
 
 static int qede_alloc_fp_array(struct qede_dev *edev)
 {
+       u8 fp_combined, fp_rx = edev->fp_num_rx;
        struct qede_fastpath *fp;
        int i;
 
-       edev->fp_array = kcalloc(QEDE_RSS_CNT(edev),
+       edev->fp_array = kcalloc(QEDE_QUEUE_CNT(edev),
                                 sizeof(*edev->fp_array), GFP_KERNEL);
        if (!edev->fp_array) {
                DP_NOTICE(edev, "fp array allocation failed\n");
                goto err;
        }
 
-       for_each_rss(i) {
+       fp_combined = QEDE_QUEUE_CNT(edev) - fp_rx - edev->fp_num_tx;
+
+       /* Allocate the FP elements for Rx queues followed by combined and then
+        * the Tx. This ordering should be maintained so that the respective
+        * queues (Rx or Tx) will be together in the fastpath array and the
+        * associated ids will be sequential.
+        */
+       for_each_queue(i) {
                fp = &edev->fp_array[i];
 
                fp->sb_info = kcalloc(1, sizeof(*fp->sb_info), GFP_KERNEL);
@@ -2368,16 +2400,33 @@ static int qede_alloc_fp_array(struct qede_dev *edev)
                        goto err;
                }
 
-               fp->rxq = kcalloc(1, sizeof(*fp->rxq), GFP_KERNEL);
-               if (!fp->rxq) {
-                       DP_NOTICE(edev, "RXQ struct allocation failed\n");
-                       goto err;
+               if (fp_rx) {
+                       fp->type = QEDE_FASTPATH_RX;
+                       fp_rx--;
+               } else if (fp_combined) {
+                       fp->type = QEDE_FASTPATH_COMBINED;
+                       fp_combined--;
+               } else {
+                       fp->type = QEDE_FASTPATH_TX;
                }
 
-               fp->txqs = kcalloc(edev->num_tc, sizeof(*fp->txqs), GFP_KERNEL);
-               if (!fp->txqs) {
-                       DP_NOTICE(edev, "TXQ array allocation failed\n");
-                       goto err;
+               if (fp->type & QEDE_FASTPATH_TX) {
+                       fp->txqs = kcalloc(edev->num_tc, sizeof(*fp->txqs),
+                                          GFP_KERNEL);
+                       if (!fp->txqs) {
+                               DP_NOTICE(edev,
+                                         "TXQ array allocation failed\n");
+                               goto err;
+                       }
+               }
+
+               if (fp->type & QEDE_FASTPATH_RX) {
+                       fp->rxq = kcalloc(1, sizeof(*fp->rxq), GFP_KERNEL);
+                       if (!fp->rxq) {
+                               DP_NOTICE(edev,
+                                         "RXQ struct allocation failed\n");
+                               goto err;
+                       }
                }
        }
 
@@ -2503,7 +2552,8 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
        edev->ops->register_ops(cdev, &qede_ll_ops, edev);
 
 #ifdef CONFIG_DCB
-       qede_set_dcbnl_ops(edev->ndev);
+       if (!IS_VF(edev))
+               qede_set_dcbnl_ops(edev->ndev);
 #endif
 
        INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
@@ -2591,8 +2641,8 @@ static int qede_set_num_queues(struct qede_dev *edev)
        u16 rss_num;
 
        /* Setup queues according to possible resources*/
-       if (edev->req_rss)
-               rss_num = edev->req_rss;
+       if (edev->req_queues)
+               rss_num = edev->req_queues;
        else
                rss_num = netif_get_num_default_rss_queues() *
                          edev->dev_info.common.num_hwfns;
@@ -2602,11 +2652,15 @@ static int qede_set_num_queues(struct qede_dev *edev)
        rc = edev->ops->common->set_fp_int(edev->cdev, rss_num);
        if (rc > 0) {
                /* Managed to request interrupts for our queues */
-               edev->num_rss = rc;
+               edev->num_queues = rc;
                DP_INFO(edev, "Managed %d [of %d] RSS queues\n",
-                       QEDE_RSS_CNT(edev), rss_num);
+                       QEDE_QUEUE_CNT(edev), rss_num);
                rc = 0;
        }
+
+       edev->fp_num_tx = edev->req_num_tx;
+       edev->fp_num_rx = edev->req_num_rx;
+
        return rc;
 }
 
@@ -2898,33 +2952,39 @@ static void qede_free_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
 
        qede_free_mem_sb(edev, fp->sb_info);
 
-       qede_free_mem_rxq(edev, fp->rxq);
+       if (fp->type & QEDE_FASTPATH_RX)
+               qede_free_mem_rxq(edev, fp->rxq);
 
-       for (tc = 0; tc < edev->num_tc; tc++)
-               qede_free_mem_txq(edev, &fp->txqs[tc]);
+       if (fp->type & QEDE_FASTPATH_TX)
+               for (tc = 0; tc < edev->num_tc; tc++)
+                       qede_free_mem_txq(edev, &fp->txqs[tc]);
 }
 
 /* This function allocates all memory needed for a single fp (i.e. an entity
- * which contains status block, one rx queue and multiple per-TC tx queues.
+ * which contains status block, one rx queue and/or multiple per-TC tx queues.
  */
 static int qede_alloc_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
 {
        int rc, tc;
 
-       rc = qede_alloc_mem_sb(edev, fp->sb_info, fp->rss_id);
+       rc = qede_alloc_mem_sb(edev, fp->sb_info, fp->id);
        if (rc)
                goto err;
 
-       rc = qede_alloc_mem_rxq(edev, fp->rxq);
-       if (rc)
-               goto err;
-
-       for (tc = 0; tc < edev->num_tc; tc++) {
-               rc = qede_alloc_mem_txq(edev, &fp->txqs[tc]);
+       if (fp->type & QEDE_FASTPATH_RX) {
+               rc = qede_alloc_mem_rxq(edev, fp->rxq);
                if (rc)
                        goto err;
        }
 
+       if (fp->type & QEDE_FASTPATH_TX) {
+               for (tc = 0; tc < edev->num_tc; tc++) {
+                       rc = qede_alloc_mem_txq(edev, &fp->txqs[tc]);
+                       if (rc)
+                               goto err;
+               }
+       }
+
        return 0;
 err:
        return rc;
@@ -2934,7 +2994,7 @@ static void qede_free_mem_load(struct qede_dev *edev)
 {
        int i;
 
-       for_each_rss(i) {
+       for_each_queue(i) {
                struct qede_fastpath *fp = &edev->fp_array[i];
 
                qede_free_mem_fp(edev, fp);
@@ -2944,16 +3004,16 @@ static void qede_free_mem_load(struct qede_dev *edev)
 /* This function allocates all qede memory at NIC load. */
 static int qede_alloc_mem_load(struct qede_dev *edev)
 {
-       int rc = 0, rss_id;
+       int rc = 0, queue_id;
 
-       for (rss_id = 0; rss_id < QEDE_RSS_CNT(edev); rss_id++) {
-               struct qede_fastpath *fp = &edev->fp_array[rss_id];
+       for (queue_id = 0; queue_id < QEDE_QUEUE_CNT(edev); queue_id++) {
+               struct qede_fastpath *fp = &edev->fp_array[queue_id];
 
                rc = qede_alloc_mem_fp(edev, fp);
                if (rc) {
                        DP_ERR(edev,
                               "Failed to allocate memory for fastpath - rss id = %d\n",
-                              rss_id);
+                              queue_id);
                        qede_free_mem_load(edev);
                        return rc;
                }
@@ -2965,30 +3025,38 @@ static int qede_alloc_mem_load(struct qede_dev *edev)
 /* This function inits fp content and resets the SB, RXQ and TXQ structures */
 static void qede_init_fp(struct qede_dev *edev)
 {
-       int rss_id, txq_index, tc;
+       int queue_id, rxq_index = 0, txq_index = 0, tc;
        struct qede_fastpath *fp;
 
-       for_each_rss(rss_id) {
-               fp = &edev->fp_array[rss_id];
+       for_each_queue(queue_id) {
+               fp = &edev->fp_array[queue_id];
 
                fp->edev = edev;
-               fp->rss_id = rss_id;
+               fp->id = queue_id;
 
                memset((void *)&fp->napi, 0, sizeof(fp->napi));
 
                memset((void *)fp->sb_info, 0, sizeof(*fp->sb_info));
 
-               memset((void *)fp->rxq, 0, sizeof(*fp->rxq));
-               fp->rxq->rxq_id = rss_id;
+               if (fp->type & QEDE_FASTPATH_RX) {
+                       memset((void *)fp->rxq, 0, sizeof(*fp->rxq));
+                       fp->rxq->rxq_id = rxq_index++;
+               }
 
-               memset((void *)fp->txqs, 0, (edev->num_tc * sizeof(*fp->txqs)));
-               for (tc = 0; tc < edev->num_tc; tc++) {
-                       txq_index = tc * QEDE_RSS_CNT(edev) + rss_id;
-                       fp->txqs[tc].index = txq_index;
+               if (fp->type & QEDE_FASTPATH_TX) {
+                       memset((void *)fp->txqs, 0,
+                              (edev->num_tc * sizeof(*fp->txqs)));
+                       for (tc = 0; tc < edev->num_tc; tc++) {
+                               fp->txqs[tc].index = txq_index +
+                                   tc * QEDE_TSS_COUNT(edev);
+                               if (edev->dev_info.is_legacy)
+                                       fp->txqs[tc].is_legacy = true;
+                       }
+                       txq_index++;
                }
 
                snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
-                        edev->ndev->name, rss_id);
+                        edev->ndev->name, queue_id);
        }
 
        edev->gro_disable = !(edev->ndev->features & NETIF_F_GRO);
@@ -2998,12 +3066,13 @@ static int qede_set_real_num_queues(struct qede_dev *edev)
 {
        int rc = 0;
 
-       rc = netif_set_real_num_tx_queues(edev->ndev, QEDE_TSS_CNT(edev));
+       rc = netif_set_real_num_tx_queues(edev->ndev, QEDE_TSS_COUNT(edev));
        if (rc) {
                DP_NOTICE(edev, "Failed to set real number of Tx queues\n");
                return rc;
        }
-       rc = netif_set_real_num_rx_queues(edev->ndev, QEDE_RSS_CNT(edev));
+
+       rc = netif_set_real_num_rx_queues(edev->ndev, QEDE_RSS_COUNT(edev));
        if (rc) {
                DP_NOTICE(edev, "Failed to set real number of Rx queues\n");
                return rc;
@@ -3016,7 +3085,7 @@ static void qede_napi_disable_remove(struct qede_dev *edev)
 {
        int i;
 
-       for_each_rss(i) {
+       for_each_queue(i) {
                napi_disable(&edev->fp_array[i].napi);
 
                netif_napi_del(&edev->fp_array[i].napi);
@@ -3028,7 +3097,7 @@ static void qede_napi_add_enable(struct qede_dev *edev)
        int i;
 
        /* Add NAPI objects */
-       for_each_rss(i) {
+       for_each_queue(i) {
                netif_napi_add(edev->ndev, &edev->fp_array[i].napi,
                               qede_poll, NAPI_POLL_WEIGHT);
                napi_enable(&edev->fp_array[i].napi);
@@ -3057,14 +3126,14 @@ static int qede_req_msix_irqs(struct qede_dev *edev)
        int i, rc;
 
        /* Sanitize number of interrupts == number of prepared RSS queues */
-       if (QEDE_RSS_CNT(edev) > edev->int_info.msix_cnt) {
+       if (QEDE_QUEUE_CNT(edev) > edev->int_info.msix_cnt) {
                DP_ERR(edev,
                       "Interrupt mismatch: %d RSS queues > %d MSI-x vectors\n",
-                      QEDE_RSS_CNT(edev), edev->int_info.msix_cnt);
+                      QEDE_QUEUE_CNT(edev), edev->int_info.msix_cnt);
                return -EINVAL;
        }
 
-       for (i = 0; i < QEDE_RSS_CNT(edev); i++) {
+       for (i = 0; i < QEDE_QUEUE_CNT(edev); i++) {
                rc = request_irq(edev->int_info.msix[i].vector,
                                 qede_msix_fp_int, 0, edev->fp_array[i].name,
                                 &edev->fp_array[i]);
@@ -3109,11 +3178,11 @@ static int qede_setup_irqs(struct qede_dev *edev)
 
                /* qed should learn receive the RSS ids and callbacks */
                ops = edev->ops->common;
-               for (i = 0; i < QEDE_RSS_CNT(edev); i++)
+               for (i = 0; i < QEDE_QUEUE_CNT(edev); i++)
                        ops->simd_handler_config(edev->cdev,
                                                 &edev->fp_array[i], i,
                                                 qede_simd_fp_handler);
-               edev->int_info.used_cnt = QEDE_RSS_CNT(edev);
+               edev->int_info.used_cnt = QEDE_QUEUE_CNT(edev);
        }
        return 0;
 }
@@ -3171,45 +3240,53 @@ static int qede_stop_queues(struct qede_dev *edev)
        }
 
        /* Flush Tx queues. If needed, request drain from MCP */
-       for_each_rss(i) {
+       for_each_queue(i) {
                struct qede_fastpath *fp = &edev->fp_array[i];
 
-               for (tc = 0; tc < edev->num_tc; tc++) {
-                       struct qede_tx_queue *txq = &fp->txqs[tc];
+               if (fp->type & QEDE_FASTPATH_TX) {
+                       for (tc = 0; tc < edev->num_tc; tc++) {
+                               struct qede_tx_queue *txq = &fp->txqs[tc];
 
-                       rc = qede_drain_txq(edev, txq, true);
-                       if (rc)
-                               return rc;
+                               rc = qede_drain_txq(edev, txq, true);
+                               if (rc)
+                                       return rc;
+                       }
                }
        }
 
-       /* Stop all Queues in reverse order*/
-       for (i = QEDE_RSS_CNT(edev) - 1; i >= 0; i--) {
+       /* Stop all Queues in reverse order */
+       for (i = QEDE_QUEUE_CNT(edev) - 1; i >= 0; i--) {
                struct qed_stop_rxq_params rx_params;
 
-               /* Stop the Tx Queue(s)*/
-               for (tc = 0; tc < edev->num_tc; tc++) {
-                       struct qed_stop_txq_params tx_params;
-
-                       tx_params.rss_id = i;
-                       tx_params.tx_queue_id = tc * QEDE_RSS_CNT(edev) + i;
-                       rc = edev->ops->q_tx_stop(cdev, &tx_params);
-                       if (rc) {
-                               DP_ERR(edev, "Failed to stop TXQ #%d\n",
-                                      tx_params.tx_queue_id);
-                               return rc;
+               /* Stop the Tx Queue(s) */
+               if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
+                       for (tc = 0; tc < edev->num_tc; tc++) {
+                               struct qed_stop_txq_params tx_params;
+                               u8 val;
+
+                               tx_params.rss_id = i;
+                               val = edev->fp_array[i].txqs[tc].index;
+                               tx_params.tx_queue_id = val;
+                               rc = edev->ops->q_tx_stop(cdev, &tx_params);
+                               if (rc) {
+                                       DP_ERR(edev, "Failed to stop TXQ #%d\n",
+                                              tx_params.tx_queue_id);
+                                       return rc;
+                               }
                        }
                }
 
-               /* Stop the Rx Queue*/
-               memset(&rx_params, 0, sizeof(rx_params));
-               rx_params.rss_id = i;
-               rx_params.rx_queue_id = i;
+               /* Stop the Rx Queue */
+               if (edev->fp_array[i].type & QEDE_FASTPATH_RX) {
+                       memset(&rx_params, 0, sizeof(rx_params));
+                       rx_params.rss_id = i;
+                       rx_params.rx_queue_id = edev->fp_array[i].rxq->rxq_id;
 
-               rc = edev->ops->q_rx_stop(cdev, &rx_params);
-               if (rc) {
-                       DP_ERR(edev, "Failed to stop RXQ #%d\n", i);
-                       return rc;
+                       rc = edev->ops->q_rx_stop(cdev, &rx_params);
+                       if (rc) {
+                               DP_ERR(edev, "Failed to stop RXQ #%d\n", i);
+                               return rc;
+                       }
                }
        }
 
@@ -3232,7 +3309,7 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
        struct qed_start_vport_params start = {0};
        bool reset_rss_indir = false;
 
-       if (!edev->num_rss) {
+       if (!edev->num_queues) {
                DP_ERR(edev,
                       "Cannot update V-VPORT as active as there are no Rx queues\n");
                return -EINVAL;
@@ -3256,50 +3333,66 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
                   "Start vport ramrod passed, vport_id = %d, MTU = %d, vlan_removal_en = %d\n",
                   start.vport_id, edev->ndev->mtu + 0xe, vlan_removal_en);
 
-       for_each_rss(i) {
+       for_each_queue(i) {
                struct qede_fastpath *fp = &edev->fp_array[i];
-               dma_addr_t phys_table = fp->rxq->rx_comp_ring.pbl.p_phys_table;
-
-               memset(&q_params, 0, sizeof(q_params));
-               q_params.rss_id = i;
-               q_params.queue_id = i;
-               q_params.vport_id = 0;
-               q_params.sb = fp->sb_info->igu_sb_id;
-               q_params.sb_idx = RX_PI;
-
-               rc = edev->ops->q_rx_start(cdev, &q_params,
-                                          fp->rxq->rx_buf_size,
-                                          fp->rxq->rx_bd_ring.p_phys_addr,
-                                          phys_table,
-                                          fp->rxq->rx_comp_ring.page_cnt,
-                                          &fp->rxq->hw_rxq_prod_addr);
-               if (rc) {
-                       DP_ERR(edev, "Start RXQ #%d failed %d\n", i, rc);
-                       return rc;
-               }
+               dma_addr_t p_phys_table;
+               u32 page_cnt;
 
-               fp->rxq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[RX_PI];
+               if (fp->type & QEDE_FASTPATH_RX) {
+                       struct qede_rx_queue *rxq = fp->rxq;
+                       __le16 *val;
 
-               qede_update_rx_prod(edev, fp->rxq);
+                       memset(&q_params, 0, sizeof(q_params));
+                       q_params.rss_id = i;
+                       q_params.queue_id = rxq->rxq_id;
+                       q_params.vport_id = 0;
+                       q_params.sb = fp->sb_info->igu_sb_id;
+                       q_params.sb_idx = RX_PI;
+
+                       p_phys_table =
+                           qed_chain_get_pbl_phys(&rxq->rx_comp_ring);
+                       page_cnt = qed_chain_get_page_cnt(&rxq->rx_comp_ring);
+
+                       rc = edev->ops->q_rx_start(cdev, &q_params,
+                                                  rxq->rx_buf_size,
+                                                  rxq->rx_bd_ring.p_phys_addr,
+                                                  p_phys_table,
+                                                  page_cnt,
+                                                  &rxq->hw_rxq_prod_addr);
+                       if (rc) {
+                               DP_ERR(edev, "Start RXQ #%d failed %d\n", i,
+                                      rc);
+                               return rc;
+                       }
+
+                       val = &fp->sb_info->sb_virt->pi_array[RX_PI];
+                       rxq->hw_cons_ptr = val;
+
+                       qede_update_rx_prod(edev, rxq);
+               }
+
+               if (!(fp->type & QEDE_FASTPATH_TX))
+                       continue;
 
                for (tc = 0; tc < edev->num_tc; tc++) {
                        struct qede_tx_queue *txq = &fp->txqs[tc];
-                       int txq_index = tc * QEDE_RSS_CNT(edev) + i;
+
+                       p_phys_table = qed_chain_get_pbl_phys(&txq->tx_pbl);
+                       page_cnt = qed_chain_get_page_cnt(&txq->tx_pbl);
 
                        memset(&q_params, 0, sizeof(q_params));
                        q_params.rss_id = i;
-                       q_params.queue_id = txq_index;
+                       q_params.queue_id = txq->index;
                        q_params.vport_id = 0;
                        q_params.sb = fp->sb_info->igu_sb_id;
                        q_params.sb_idx = TX_PI(tc);
 
                        rc = edev->ops->q_tx_start(cdev, &q_params,
-                                                  txq->tx_pbl.pbl.p_phys_table,
-                                                  txq->tx_pbl.page_cnt,
+                                                  p_phys_table, page_cnt,
                                                   &txq->doorbell_addr);
                        if (rc) {
                                DP_ERR(edev, "Start TXQ #%d failed %d\n",
-                                      txq_index, rc);
+                                      txq->index, rc);
                                return rc;
                        }
 
@@ -3330,13 +3423,13 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
        }
 
        /* Fill struct with RSS params */
-       if (QEDE_RSS_CNT(edev) > 1) {
+       if (QEDE_RSS_COUNT(edev) > 1) {
                vport_update_params.update_rss_flg = 1;
 
                /* Need to validate current RSS config uses valid entries */
                for (i = 0; i < QED_RSS_IND_TABLE_SIZE; i++) {
                        if (edev->rss_params.rss_ind_table[i] >=
-                           edev->num_rss) {
+                           QEDE_RSS_COUNT(edev)) {
                                reset_rss_indir = true;
                                break;
                        }
@@ -3349,7 +3442,7 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
                        for (i = 0; i < QED_RSS_IND_TABLE_SIZE; i++) {
                                u16 indir_val;
 
-                               val = QEDE_RSS_CNT(edev);
+                               val = QEDE_RSS_COUNT(edev);
                                indir_val = ethtool_rxfh_indir_default(i, val);
                                edev->rss_params.rss_ind_table[i] = indir_val;
                        }
@@ -3478,7 +3571,7 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
        if (rc)
                goto err1;
        DP_INFO(edev, "Allocated %d RSS queues on %d TC/s\n",
-               QEDE_RSS_CNT(edev), edev->num_tc);
+               QEDE_QUEUE_CNT(edev), edev->num_tc);
 
        rc = qede_set_real_num_queues(edev);
        if (rc)
@@ -3531,7 +3624,9 @@ err2:
 err1:
        edev->ops->common->set_fp_int(edev->cdev, 0);
        qede_free_fp_array(edev);
-       edev->num_rss = 0;
+       edev->num_queues = 0;
+       edev->fp_num_tx = 0;
+       edev->fp_num_rx = 0;
 err0:
        return rc;
 }
index 3ebef27..3ae3968 100644 (file)
@@ -432,18 +432,19 @@ static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 
 static int qlcnic_fdb_dump(struct sk_buff *skb, struct netlink_callback *ncb,
                        struct net_device *netdev,
-                       struct net_device *filter_dev, int idx)
+                       struct net_device *filter_dev, int *idx)
 {
        struct qlcnic_adapter *adapter = netdev_priv(netdev);
+       int err = 0;
 
        if (!adapter->fdb_mac_learn)
                return ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx);
 
        if ((adapter->flags & QLCNIC_ESWITCH_ENABLED) ||
            qlcnic_sriov_check(adapter))
-               idx = ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx);
+               err = ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx);
 
-       return idx;
+       return err;
 }
 
 static void qlcnic_82xx_cancel_idc_work(struct qlcnic_adapter *adapter)
index a76e380..9ba568d 100644 (file)
@@ -24,4 +24,16 @@ config QCA7000
          To compile this driver as a module, choose M here. The module
          will be called qcaspi.
 
+config QCOM_EMAC
+       tristate "Qualcomm Technologies, Inc. EMAC Gigabit Ethernet support"
+       select CRC32
+       select PHYLIB
+       ---help---
+         This driver supports the Qualcomm Technologies, Inc. Gigabit
+         Ethernet Media Access Controller (EMAC). The controller
+         supports IEEE 802.3-2002, half-duplex mode at 10/100 Mb/s,
+         full-duplex mode at 10/100/1000Mb/s, Wake On LAN (WOL) for
+         low power, Receive-Side Scaling (RSS), and IEEE 1588-2008
+         Precision Clock Synchronization Protocol.
+
 endif # NET_VENDOR_QUALCOMM
index 9da2d75..aacb0a5 100644 (file)
@@ -4,3 +4,5 @@
 
 obj-$(CONFIG_QCA7000) += qcaspi.o
 qcaspi-objs := qca_spi.o qca_framing.o qca_7k.o qca_debug.o
+
+obj-y += emac/
diff --git a/drivers/net/ethernet/qualcomm/emac/Makefile b/drivers/net/ethernet/qualcomm/emac/Makefile
new file mode 100644 (file)
index 0000000..01ee144
--- /dev/null
@@ -0,0 +1,7 @@
+#
+# Makefile for the Qualcomm Technologies, Inc. EMAC Gigabit Ethernet driver
+#
+
+obj-$(CONFIG_QCOM_EMAC) += qcom-emac.o
+
+qcom-emac-objs := emac.o emac-mac.o emac-phy.o emac-sgmii.o
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
new file mode 100644 (file)
index 0000000..e97968e
--- /dev/null
@@ -0,0 +1,1528 @@
+/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* Qualcomm Technologies, Inc. EMAC Ethernet Controller MAC layer support
+ */
+
+#include <linux/tcp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/crc32.h>
+#include <linux/if_vlan.h>
+#include <linux/jiffies.h>
+#include <linux/phy.h>
+#include <linux/of.h>
+#include <net/ip6_checksum.h>
+#include "emac.h"
+#include "emac-sgmii.h"
+
+/* EMAC base register offsets */
+#define EMAC_MAC_CTRL                  0x001480
+#define EMAC_WOL_CTRL0                 0x0014a0
+#define EMAC_RSS_KEY0                  0x0014b0
+#define EMAC_H1TPD_BASE_ADDR_LO                0x0014e0
+#define EMAC_H2TPD_BASE_ADDR_LO                0x0014e4
+#define EMAC_H3TPD_BASE_ADDR_LO                0x0014e8
+#define EMAC_INTER_SRAM_PART9          0x001534
+#define EMAC_DESC_CTRL_0               0x001540
+#define EMAC_DESC_CTRL_1               0x001544
+#define EMAC_DESC_CTRL_2               0x001550
+#define EMAC_DESC_CTRL_10              0x001554
+#define EMAC_DESC_CTRL_12              0x001558
+#define EMAC_DESC_CTRL_13              0x00155c
+#define EMAC_DESC_CTRL_3               0x001560
+#define EMAC_DESC_CTRL_4               0x001564
+#define EMAC_DESC_CTRL_5               0x001568
+#define EMAC_DESC_CTRL_14              0x00156c
+#define EMAC_DESC_CTRL_15              0x001570
+#define EMAC_DESC_CTRL_16              0x001574
+#define EMAC_DESC_CTRL_6               0x001578
+#define EMAC_DESC_CTRL_8               0x001580
+#define EMAC_DESC_CTRL_9               0x001584
+#define EMAC_DESC_CTRL_11              0x001588
+#define EMAC_TXQ_CTRL_0                        0x001590
+#define EMAC_TXQ_CTRL_1                        0x001594
+#define EMAC_TXQ_CTRL_2                        0x001598
+#define EMAC_RXQ_CTRL_0                        0x0015a0
+#define EMAC_RXQ_CTRL_1                        0x0015a4
+#define EMAC_RXQ_CTRL_2                        0x0015a8
+#define EMAC_RXQ_CTRL_3                        0x0015ac
+#define EMAC_BASE_CPU_NUMBER           0x0015b8
+#define EMAC_DMA_CTRL                  0x0015c0
+#define EMAC_MAILBOX_0                 0x0015e0
+#define EMAC_MAILBOX_5                 0x0015e4
+#define EMAC_MAILBOX_6                 0x0015e8
+#define EMAC_MAILBOX_13                        0x0015ec
+#define EMAC_MAILBOX_2                 0x0015f4
+#define EMAC_MAILBOX_3                 0x0015f8
+#define EMAC_MAILBOX_11                        0x00160c
+#define EMAC_AXI_MAST_CTRL             0x001610
+#define EMAC_MAILBOX_12                        0x001614
+#define EMAC_MAILBOX_9                 0x001618
+#define EMAC_MAILBOX_10                        0x00161c
+#define EMAC_ATHR_HEADER_CTRL          0x001620
+#define EMAC_CLK_GATE_CTRL             0x001814
+#define EMAC_MISC_CTRL                 0x001990
+#define EMAC_MAILBOX_7                 0x0019e0
+#define EMAC_MAILBOX_8                 0x0019e4
+#define EMAC_MAILBOX_15                        0x001bd4
+#define EMAC_MAILBOX_16                        0x001bd8
+
+/* EMAC_MAC_CTRL */
+#define SINGLE_PAUSE_MODE              0x10000000
+#define DEBUG_MODE                      0x08000000
+#define BROAD_EN                        0x04000000
+#define MULTI_ALL                       0x02000000
+#define RX_CHKSUM_EN                    0x01000000
+#define HUGE                            0x00800000
+#define SPEED(x)                       (((x) & 0x3) << 20)
+#define SPEED_MASK                     SPEED(0x3)
+#define SIMR                            0x00080000
+#define TPAUSE                          0x00010000
+#define PROM_MODE                       0x00008000
+#define VLAN_STRIP                      0x00004000
+#define PRLEN_BMSK                      0x00003c00
+#define PRLEN_SHFT                      10
+#define HUGEN                           0x00000200
+#define FLCHK                           0x00000100
+#define PCRCE                           0x00000080
+#define CRCE                            0x00000040
+#define FULLD                           0x00000020
+#define MAC_LP_EN                       0x00000010
+#define RXFC                            0x00000008
+#define TXFC                            0x00000004
+#define RXEN                            0x00000002
+#define TXEN                            0x00000001
+
+
+/* EMAC_WOL_CTRL0 */
+#define LK_CHG_PME                     0x20
+#define LK_CHG_EN                      0x10
+#define MG_FRAME_PME                   0x8
+#define MG_FRAME_EN                    0x4
+#define WK_FRAME_EN                    0x1
+
+/* EMAC_DESC_CTRL_3 */
+#define RFD_RING_SIZE_BMSK                                       0xfff
+
+/* EMAC_DESC_CTRL_4 */
+#define RX_BUFFER_SIZE_BMSK                                     0xffff
+
+/* EMAC_DESC_CTRL_6 */
+#define RRD_RING_SIZE_BMSK                                       0xfff
+
+/* EMAC_DESC_CTRL_9 */
+#define TPD_RING_SIZE_BMSK                                      0xffff
+
+/* EMAC_TXQ_CTRL_0 */
+#define NUM_TXF_BURST_PREF_BMSK                             0xffff0000
+#define NUM_TXF_BURST_PREF_SHFT                                     16
+#define LS_8023_SP                                                0x80
+#define TXQ_MODE                                                  0x40
+#define TXQ_EN                                                    0x20
+#define IP_OP_SP                                                  0x10
+#define NUM_TPD_BURST_PREF_BMSK                                    0xf
+#define NUM_TPD_BURST_PREF_SHFT                                      0
+
+/* EMAC_TXQ_CTRL_1 */
+#define JUMBO_TASK_OFFLOAD_THRESHOLD_BMSK                        0x7ff
+
+/* EMAC_TXQ_CTRL_2 */
+#define TXF_HWM_BMSK                                         0xfff0000
+#define TXF_LWM_BMSK                                             0xfff
+
+/* EMAC_RXQ_CTRL_0 */
+#define RXQ_EN                                                 BIT(31)
+#define CUT_THRU_EN                                            BIT(30)
+#define RSS_HASH_EN                                            BIT(29)
+#define NUM_RFD_BURST_PREF_BMSK                              0x3f00000
+#define NUM_RFD_BURST_PREF_SHFT                                     20
+#define IDT_TABLE_SIZE_BMSK                                    0x1ff00
+#define IDT_TABLE_SIZE_SHFT                                          8
+#define SP_IPV6                                                   0x80
+
+/* EMAC_RXQ_CTRL_1 */
+#define JUMBO_1KAH_BMSK                                         0xf000
+#define JUMBO_1KAH_SHFT                                             12
+#define RFD_PREF_LOW_TH                                           0x10
+#define RFD_PREF_LOW_THRESHOLD_BMSK                              0xfc0
+#define RFD_PREF_LOW_THRESHOLD_SHFT                                  6
+#define RFD_PREF_UP_TH                                            0x10
+#define RFD_PREF_UP_THRESHOLD_BMSK                                0x3f
+#define RFD_PREF_UP_THRESHOLD_SHFT                                   0
+
+/* EMAC_RXQ_CTRL_2 */
+#define RXF_DOF_THRESFHOLD                                       0x1a0
+#define RXF_DOF_THRESHOLD_BMSK                               0xfff0000
+#define RXF_DOF_THRESHOLD_SHFT                                      16
+#define RXF_UOF_THRESFHOLD                                        0xbe
+#define RXF_UOF_THRESHOLD_BMSK                                   0xfff
+#define RXF_UOF_THRESHOLD_SHFT                                       0
+
+/* EMAC_RXQ_CTRL_3 */
+#define RXD_TIMER_BMSK                                      0xffff0000
+#define RXD_THRESHOLD_BMSK                                       0xfff
+#define RXD_THRESHOLD_SHFT                                           0
+
+/* EMAC_DMA_CTRL */
+#define DMAW_DLY_CNT_BMSK                                      0xf0000
+#define DMAW_DLY_CNT_SHFT                                           16
+#define DMAR_DLY_CNT_BMSK                                       0xf800
+#define DMAR_DLY_CNT_SHFT                                           11
+#define DMAR_REQ_PRI                                             0x400
+#define REGWRBLEN_BMSK                                           0x380
+#define REGWRBLEN_SHFT                                               7
+#define REGRDBLEN_BMSK                                            0x70
+#define REGRDBLEN_SHFT                                               4
+#define OUT_ORDER_MODE                                             0x4
+#define ENH_ORDER_MODE                                             0x2
+#define IN_ORDER_MODE                                              0x1
+
+/* EMAC_MAILBOX_13 */
+#define RFD3_PROC_IDX_BMSK                                   0xfff0000
+#define RFD3_PROC_IDX_SHFT                                          16
+#define RFD3_PROD_IDX_BMSK                                       0xfff
+#define RFD3_PROD_IDX_SHFT                                           0
+
+/* EMAC_MAILBOX_2 */
+#define NTPD_CONS_IDX_BMSK                                  0xffff0000
+#define NTPD_CONS_IDX_SHFT                                          16
+
+/* EMAC_MAILBOX_3 */
+#define RFD0_CONS_IDX_BMSK                                       0xfff
+#define RFD0_CONS_IDX_SHFT                                           0
+
+/* EMAC_MAILBOX_11 */
+#define H3TPD_PROD_IDX_BMSK                                 0xffff0000
+#define H3TPD_PROD_IDX_SHFT                                         16
+
+/* EMAC_AXI_MAST_CTRL */
+#define DATA_BYTE_SWAP                                             0x8
+#define MAX_BOUND                                                  0x2
+#define MAX_BTYPE                                                  0x1
+
+/* EMAC_MAILBOX_12 */
+#define H3TPD_CONS_IDX_BMSK                                 0xffff0000
+#define H3TPD_CONS_IDX_SHFT                                         16
+
+/* EMAC_MAILBOX_9 */
+#define H2TPD_PROD_IDX_BMSK                                     0xffff
+#define H2TPD_PROD_IDX_SHFT                                          0
+
+/* EMAC_MAILBOX_10 */
+#define H1TPD_CONS_IDX_BMSK                                 0xffff0000
+#define H1TPD_CONS_IDX_SHFT                                         16
+#define H2TPD_CONS_IDX_BMSK                                     0xffff
+#define H2TPD_CONS_IDX_SHFT                                          0
+
+/* EMAC_ATHR_HEADER_CTRL */
+#define HEADER_CNT_EN                                              0x2
+#define HEADER_ENABLE                                              0x1
+
+/* EMAC_MAILBOX_0 */
+#define RFD0_PROC_IDX_BMSK                                   0xfff0000
+#define RFD0_PROC_IDX_SHFT                                          16
+#define RFD0_PROD_IDX_BMSK                                       0xfff
+#define RFD0_PROD_IDX_SHFT                                           0
+
+/* EMAC_MAILBOX_5 */
+#define RFD1_PROC_IDX_BMSK                                   0xfff0000
+#define RFD1_PROC_IDX_SHFT                                          16
+#define RFD1_PROD_IDX_BMSK                                       0xfff
+#define RFD1_PROD_IDX_SHFT                                           0
+
+/* EMAC_MISC_CTRL */
+#define RX_UNCPL_INT_EN                                            0x1
+
+/* EMAC_MAILBOX_7 */
+#define RFD2_CONS_IDX_BMSK                                   0xfff0000
+#define RFD2_CONS_IDX_SHFT                                          16
+#define RFD1_CONS_IDX_BMSK                                       0xfff
+#define RFD1_CONS_IDX_SHFT                                           0
+
+/* EMAC_MAILBOX_8 */
+#define RFD3_CONS_IDX_BMSK                                       0xfff
+#define RFD3_CONS_IDX_SHFT                                           0
+
+/* EMAC_MAILBOX_15 */
+#define NTPD_PROD_IDX_BMSK                                      0xffff
+#define NTPD_PROD_IDX_SHFT                                           0
+
+/* EMAC_MAILBOX_16 */
+#define H1TPD_PROD_IDX_BMSK                                     0xffff
+#define H1TPD_PROD_IDX_SHFT                                          0
+
+#define RXQ0_RSS_HSTYP_IPV6_TCP_EN                                0x20
+#define RXQ0_RSS_HSTYP_IPV6_EN                                    0x10
+#define RXQ0_RSS_HSTYP_IPV4_TCP_EN                                 0x8
+#define RXQ0_RSS_HSTYP_IPV4_EN                                     0x4
+
+/* EMAC_EMAC_WRAPPER_TX_TS_INX */
+#define EMAC_WRAPPER_TX_TS_EMPTY                               BIT(31)
+#define EMAC_WRAPPER_TX_TS_INX_BMSK                             0xffff
+
+struct emac_skb_cb {
+       u32           tpd_idx;
+       unsigned long jiffies;
+};
+
+#define EMAC_SKB_CB(skb)       ((struct emac_skb_cb *)(skb)->cb)
+#define EMAC_RSS_IDT_SIZE      256
+#define JUMBO_1KAH             0x4
+#define RXD_TH                 0x100
+#define EMAC_TPD_LAST_FRAGMENT 0x80000000
+#define EMAC_TPD_TSTAMP_SAVE   0x80000000
+
+/* EMAC Errors in emac_rrd.word[3] */
+#define EMAC_RRD_L4F           BIT(14)
+#define EMAC_RRD_IPF           BIT(15)
+#define EMAC_RRD_CRC           BIT(21)
+#define EMAC_RRD_FAE           BIT(22)
+#define EMAC_RRD_TRN           BIT(23)
+#define EMAC_RRD_RNT           BIT(24)
+#define EMAC_RRD_INC           BIT(25)
+#define EMAC_RRD_FOV           BIT(29)
+#define EMAC_RRD_LEN           BIT(30)
+
+/* Error bits that will result in a received frame being discarded */
+#define EMAC_RRD_ERROR (EMAC_RRD_IPF | EMAC_RRD_CRC | EMAC_RRD_FAE | \
+                       EMAC_RRD_TRN | EMAC_RRD_RNT | EMAC_RRD_INC | \
+                       EMAC_RRD_FOV | EMAC_RRD_LEN)
+#define EMAC_RRD_STATS_DW_IDX 3
+
+#define EMAC_RRD(RXQ, SIZE, IDX)       ((RXQ)->rrd.v_addr + (SIZE * (IDX)))
+#define EMAC_RFD(RXQ, SIZE, IDX)       ((RXQ)->rfd.v_addr + (SIZE * (IDX)))
+#define EMAC_TPD(TXQ, SIZE, IDX)       ((TXQ)->tpd.v_addr + (SIZE * (IDX)))
+
+#define GET_RFD_BUFFER(RXQ, IDX)       (&((RXQ)->rfd.rfbuff[(IDX)]))
+#define GET_TPD_BUFFER(RTQ, IDX)       (&((RTQ)->tpd.tpbuff[(IDX)]))
+
+#define EMAC_TX_POLL_HWTXTSTAMP_THRESHOLD      8
+
+#define ISR_RX_PKT      (\
+       RX_PKT_INT0     |\
+       RX_PKT_INT1     |\
+       RX_PKT_INT2     |\
+       RX_PKT_INT3)
+
+#define EMAC_MAC_IRQ_RES                                       "core0"
+
+void emac_mac_multicast_addr_set(struct emac_adapter *adpt, u8 *addr)
+{
+       u32 crc32, bit, reg, mta;
+
+       /* Calculate the CRC of the MAC address */
+       crc32 = ether_crc(ETH_ALEN, addr);
+
+       /* The HASH Table is an array of 2 32-bit registers. It is
+        * treated like an array of 64 bits (BitArray[hash_value]).
+        * Use the upper 6 bits of the above CRC as the hash value.
+        */
+       reg = (crc32 >> 31) & 0x1;
+       bit = (crc32 >> 26) & 0x1F;
+
+       mta = readl(adpt->base + EMAC_HASH_TAB_REG0 + (reg << 2));
+       mta |= BIT(bit);
+       writel(mta, adpt->base + EMAC_HASH_TAB_REG0 + (reg << 2));
+}
+
+void emac_mac_multicast_addr_clear(struct emac_adapter *adpt)
+{
+       writel(0, adpt->base + EMAC_HASH_TAB_REG0);
+       writel(0, adpt->base + EMAC_HASH_TAB_REG1);
+}
+
+/* definitions for RSS */
+#define EMAC_RSS_KEY(_i, _type) \
+               (EMAC_RSS_KEY0 + ((_i) * sizeof(_type)))
+#define EMAC_RSS_TBL(_i, _type) \
+               (EMAC_IDT_TABLE0 + ((_i) * sizeof(_type)))
+
+/* Config MAC modes */
+void emac_mac_mode_config(struct emac_adapter *adpt)
+{
+       struct net_device *netdev = adpt->netdev;
+       u32 mac;
+
+       mac = readl(adpt->base + EMAC_MAC_CTRL);
+       mac &= ~(VLAN_STRIP | PROM_MODE | MULTI_ALL | MAC_LP_EN);
+
+       if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+               mac |= VLAN_STRIP;
+
+       if (netdev->flags & IFF_PROMISC)
+               mac |= PROM_MODE;
+
+       if (netdev->flags & IFF_ALLMULTI)
+               mac |= MULTI_ALL;
+
+       writel(mac, adpt->base + EMAC_MAC_CTRL);
+}
+
+/* Config descriptor rings */
+static void emac_mac_dma_rings_config(struct emac_adapter *adpt)
+{
+       static const unsigned short tpd_q_offset[] = {
+               EMAC_DESC_CTRL_8,        EMAC_H1TPD_BASE_ADDR_LO,
+               EMAC_H2TPD_BASE_ADDR_LO, EMAC_H3TPD_BASE_ADDR_LO};
+       static const unsigned short rfd_q_offset[] = {
+               EMAC_DESC_CTRL_2,        EMAC_DESC_CTRL_10,
+               EMAC_DESC_CTRL_12,       EMAC_DESC_CTRL_13};
+       static const unsigned short rrd_q_offset[] = {
+               EMAC_DESC_CTRL_5,        EMAC_DESC_CTRL_14,
+               EMAC_DESC_CTRL_15,       EMAC_DESC_CTRL_16};
+
+       /* TPD (Transmit Packet Descriptor) */
+       writel(upper_32_bits(adpt->tx_q.tpd.dma_addr),
+              adpt->base + EMAC_DESC_CTRL_1);
+
+       writel(lower_32_bits(adpt->tx_q.tpd.dma_addr),
+              adpt->base + tpd_q_offset[0]);
+
+       writel(adpt->tx_q.tpd.count & TPD_RING_SIZE_BMSK,
+              adpt->base + EMAC_DESC_CTRL_9);
+
+       /* RFD (Receive Free Descriptor) & RRD (Receive Return Descriptor) */
+       writel(upper_32_bits(adpt->rx_q.rfd.dma_addr),
+              adpt->base + EMAC_DESC_CTRL_0);
+
+       writel(lower_32_bits(adpt->rx_q.rfd.dma_addr),
+              adpt->base + rfd_q_offset[0]);
+       writel(lower_32_bits(adpt->rx_q.rrd.dma_addr),
+              adpt->base + rrd_q_offset[0]);
+
+       writel(adpt->rx_q.rfd.count & RFD_RING_SIZE_BMSK,
+              adpt->base + EMAC_DESC_CTRL_3);
+       writel(adpt->rx_q.rrd.count & RRD_RING_SIZE_BMSK,
+              adpt->base + EMAC_DESC_CTRL_6);
+
+       writel(adpt->rxbuf_size & RX_BUFFER_SIZE_BMSK,
+              adpt->base + EMAC_DESC_CTRL_4);
+
+       writel(0, adpt->base + EMAC_DESC_CTRL_11);
+
+       /* Load all of the base addresses above and ensure that triggering HW to
+        * read ring pointers is flushed
+        */
+       writel(1, adpt->base + EMAC_INTER_SRAM_PART9);
+}
+
+/* Config transmit parameters */
+static void emac_mac_tx_config(struct emac_adapter *adpt)
+{
+       u32 val;
+
+       writel((EMAC_MAX_TX_OFFLOAD_THRESH >> 3) &
+              JUMBO_TASK_OFFLOAD_THRESHOLD_BMSK, adpt->base + EMAC_TXQ_CTRL_1);
+
+       val = (adpt->tpd_burst << NUM_TPD_BURST_PREF_SHFT) &
+              NUM_TPD_BURST_PREF_BMSK;
+
+       val |= TXQ_MODE | LS_8023_SP;
+       val |= (0x0100 << NUM_TXF_BURST_PREF_SHFT) &
+               NUM_TXF_BURST_PREF_BMSK;
+
+       writel(val, adpt->base + EMAC_TXQ_CTRL_0);
+       emac_reg_update32(adpt->base + EMAC_TXQ_CTRL_2,
+                         (TXF_HWM_BMSK | TXF_LWM_BMSK), 0);
+}
+
+/* Config receive parameters */
+static void emac_mac_rx_config(struct emac_adapter *adpt)
+{
+       u32 val;
+
+       val = (adpt->rfd_burst << NUM_RFD_BURST_PREF_SHFT) &
+              NUM_RFD_BURST_PREF_BMSK;
+       val |= (SP_IPV6 | CUT_THRU_EN);
+
+       writel(val, adpt->base + EMAC_RXQ_CTRL_0);
+
+       val = readl(adpt->base + EMAC_RXQ_CTRL_1);
+       val &= ~(JUMBO_1KAH_BMSK | RFD_PREF_LOW_THRESHOLD_BMSK |
+                RFD_PREF_UP_THRESHOLD_BMSK);
+       val |= (JUMBO_1KAH << JUMBO_1KAH_SHFT) |
+               (RFD_PREF_LOW_TH << RFD_PREF_LOW_THRESHOLD_SHFT) |
+               (RFD_PREF_UP_TH  << RFD_PREF_UP_THRESHOLD_SHFT);
+       writel(val, adpt->base + EMAC_RXQ_CTRL_1);
+
+       val = readl(adpt->base + EMAC_RXQ_CTRL_2);
+       val &= ~(RXF_DOF_THRESHOLD_BMSK | RXF_UOF_THRESHOLD_BMSK);
+       val |= (RXF_DOF_THRESFHOLD  << RXF_DOF_THRESHOLD_SHFT) |
+               (RXF_UOF_THRESFHOLD << RXF_UOF_THRESHOLD_SHFT);
+       writel(val, adpt->base + EMAC_RXQ_CTRL_2);
+
+       val = readl(adpt->base + EMAC_RXQ_CTRL_3);
+       val &= ~(RXD_TIMER_BMSK | RXD_THRESHOLD_BMSK);
+       val |= RXD_TH << RXD_THRESHOLD_SHFT;
+       writel(val, adpt->base + EMAC_RXQ_CTRL_3);
+}
+
+/* Config dma */
+static void emac_mac_dma_config(struct emac_adapter *adpt)
+{
+       u32 dma_ctrl = DMAR_REQ_PRI;
+
+       switch (adpt->dma_order) {
+       case emac_dma_ord_in:
+               dma_ctrl |= IN_ORDER_MODE;
+               break;
+       case emac_dma_ord_enh:
+               dma_ctrl |= ENH_ORDER_MODE;
+               break;
+       case emac_dma_ord_out:
+               dma_ctrl |= OUT_ORDER_MODE;
+               break;
+       default:
+               break;
+       }
+
+       dma_ctrl |= (((u32)adpt->dmar_block) << REGRDBLEN_SHFT) &
+                                               REGRDBLEN_BMSK;
+       dma_ctrl |= (((u32)adpt->dmaw_block) << REGWRBLEN_SHFT) &
+                                               REGWRBLEN_BMSK;
+       dma_ctrl |= (((u32)adpt->dmar_dly_cnt) << DMAR_DLY_CNT_SHFT) &
+                                               DMAR_DLY_CNT_BMSK;
+       dma_ctrl |= (((u32)adpt->dmaw_dly_cnt) << DMAW_DLY_CNT_SHFT) &
+                                               DMAW_DLY_CNT_BMSK;
+
+       /* config DMA and ensure that configuration is flushed to HW */
+       writel(dma_ctrl, adpt->base + EMAC_DMA_CTRL);
+}
+
+/* set MAC address */
+static void emac_set_mac_address(struct emac_adapter *adpt, u8 *addr)
+{
+       u32 sta;
+
+       /* for example: 00-A0-C6-11-22-33
+        * 0<-->C6112233, 1<-->00A0.
+        */
+
+       /* low 32bit word */
+       sta = (((u32)addr[2]) << 24) | (((u32)addr[3]) << 16) |
+             (((u32)addr[4]) << 8)  | (((u32)addr[5]));
+       writel(sta, adpt->base + EMAC_MAC_STA_ADDR0);
+
+       /* hight 32bit word */
+       sta = (((u32)addr[0]) << 8) | (u32)addr[1];
+       writel(sta, adpt->base + EMAC_MAC_STA_ADDR1);
+}
+
+static void emac_mac_config(struct emac_adapter *adpt)
+{
+       struct net_device *netdev = adpt->netdev;
+       unsigned int max_frame;
+       u32 val;
+
+       emac_set_mac_address(adpt, netdev->dev_addr);
+
+       max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+       adpt->rxbuf_size = netdev->mtu > EMAC_DEF_RX_BUF_SIZE ?
+               ALIGN(max_frame, 8) : EMAC_DEF_RX_BUF_SIZE;
+
+       emac_mac_dma_rings_config(adpt);
+
+       writel(netdev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN,
+              adpt->base + EMAC_MAX_FRAM_LEN_CTRL);
+
+       emac_mac_tx_config(adpt);
+       emac_mac_rx_config(adpt);
+       emac_mac_dma_config(adpt);
+
+       val = readl(adpt->base + EMAC_AXI_MAST_CTRL);
+       val &= ~(DATA_BYTE_SWAP | MAX_BOUND);
+       val |= MAX_BTYPE;
+       writel(val, adpt->base + EMAC_AXI_MAST_CTRL);
+       writel(0, adpt->base + EMAC_CLK_GATE_CTRL);
+       writel(RX_UNCPL_INT_EN, adpt->base + EMAC_MISC_CTRL);
+}
+
+void emac_mac_reset(struct emac_adapter *adpt)
+{
+       emac_mac_stop(adpt);
+
+       emac_reg_update32(adpt->base + EMAC_DMA_MAS_CTRL, 0, SOFT_RST);
+       usleep_range(100, 150); /* reset may take up to 100usec */
+
+       /* interrupt clear-on-read */
+       emac_reg_update32(adpt->base + EMAC_DMA_MAS_CTRL, 0, INT_RD_CLR_EN);
+}
+
+void emac_mac_start(struct emac_adapter *adpt)
+{
+       struct phy_device *phydev = adpt->phydev;
+       u32 mac, csr1;
+
+       /* enable tx queue */
+       emac_reg_update32(adpt->base + EMAC_TXQ_CTRL_0, 0, TXQ_EN);
+
+       /* enable rx queue */
+       emac_reg_update32(adpt->base + EMAC_RXQ_CTRL_0, 0, RXQ_EN);
+
+       /* enable mac control */
+       mac = readl(adpt->base + EMAC_MAC_CTRL);
+       csr1 = readl(adpt->csr + EMAC_EMAC_WRAPPER_CSR1);
+
+       mac |= TXEN | RXEN;     /* enable RX/TX */
+
+       /* We don't have ethtool support yet, so force flow-control mode
+        * to 'full' always.
+        */
+       mac |= TXFC | RXFC;
+
+       /* setup link speed */
+       mac &= ~SPEED_MASK;
+       if (phydev->speed == SPEED_1000) {
+               mac |= SPEED(2);
+               csr1 |= FREQ_MODE;
+       } else {
+               mac |= SPEED(1);
+               csr1 &= ~FREQ_MODE;
+       }
+
+       if (phydev->duplex == DUPLEX_FULL)
+               mac |= FULLD;
+       else
+               mac &= ~FULLD;
+
+       /* other parameters */
+       mac |= (CRCE | PCRCE);
+       mac |= ((adpt->preamble << PRLEN_SHFT) & PRLEN_BMSK);
+       mac |= BROAD_EN;
+       mac |= FLCHK;
+       mac &= ~RX_CHKSUM_EN;
+       mac &= ~(HUGEN | VLAN_STRIP | TPAUSE | SIMR | HUGE | MULTI_ALL |
+                DEBUG_MODE | SINGLE_PAUSE_MODE);
+
+       writel_relaxed(csr1, adpt->csr + EMAC_EMAC_WRAPPER_CSR1);
+
+       writel_relaxed(mac, adpt->base + EMAC_MAC_CTRL);
+
+       /* enable interrupt read clear, low power sleep mode and
+        * the irq moderators
+        */
+
+       writel_relaxed(adpt->irq_mod, adpt->base + EMAC_IRQ_MOD_TIM_INIT);
+       writel_relaxed(INT_RD_CLR_EN | LPW_MODE | IRQ_MODERATOR_EN |
+                       IRQ_MODERATOR2_EN, adpt->base + EMAC_DMA_MAS_CTRL);
+
+       emac_mac_mode_config(adpt);
+
+       emac_reg_update32(adpt->base + EMAC_ATHR_HEADER_CTRL,
+                         (HEADER_ENABLE | HEADER_CNT_EN), 0);
+
+       emac_reg_update32(adpt->csr + EMAC_EMAC_WRAPPER_CSR2, 0, WOL_EN);
+}
+
+void emac_mac_stop(struct emac_adapter *adpt)
+{
+       emac_reg_update32(adpt->base + EMAC_RXQ_CTRL_0, RXQ_EN, 0);
+       emac_reg_update32(adpt->base + EMAC_TXQ_CTRL_0, TXQ_EN, 0);
+       emac_reg_update32(adpt->base + EMAC_MAC_CTRL, TXEN | RXEN, 0);
+       usleep_range(1000, 1050); /* stopping mac may take upto 1msec */
+}
+
+/* Free all descriptors of given transmit queue */
+static void emac_tx_q_descs_free(struct emac_adapter *adpt)
+{
+       struct emac_tx_queue *tx_q = &adpt->tx_q;
+       unsigned int i;
+       size_t size;
+
+       /* ring already cleared, nothing to do */
+       if (!tx_q->tpd.tpbuff)
+               return;
+
+       for (i = 0; i < tx_q->tpd.count; i++) {
+               struct emac_buffer *tpbuf = GET_TPD_BUFFER(tx_q, i);
+
+               if (tpbuf->dma_addr) {
+                       dma_unmap_single(adpt->netdev->dev.parent,
+                                        tpbuf->dma_addr, tpbuf->length,
+                                        DMA_TO_DEVICE);
+                       tpbuf->dma_addr = 0;
+               }
+               if (tpbuf->skb) {
+                       dev_kfree_skb_any(tpbuf->skb);
+                       tpbuf->skb = NULL;
+               }
+       }
+
+       size = sizeof(struct emac_buffer) * tx_q->tpd.count;
+       memset(tx_q->tpd.tpbuff, 0, size);
+
+       /* clear the descriptor ring */
+       memset(tx_q->tpd.v_addr, 0, tx_q->tpd.size);
+
+       tx_q->tpd.consume_idx = 0;
+       tx_q->tpd.produce_idx = 0;
+}
+
+/* Free all descriptors of given receive queue */
+static void emac_rx_q_free_descs(struct emac_adapter *adpt)
+{
+       struct device *dev = adpt->netdev->dev.parent;
+       struct emac_rx_queue *rx_q = &adpt->rx_q;
+       unsigned int i;
+       size_t size;
+
+       /* ring already cleared, nothing to do */
+       if (!rx_q->rfd.rfbuff)
+               return;
+
+       for (i = 0; i < rx_q->rfd.count; i++) {
+               struct emac_buffer *rfbuf = GET_RFD_BUFFER(rx_q, i);
+
+               if (rfbuf->dma_addr) {
+                       dma_unmap_single(dev, rfbuf->dma_addr, rfbuf->length,
+                                        DMA_FROM_DEVICE);
+                       rfbuf->dma_addr = 0;
+               }
+               if (rfbuf->skb) {
+                       dev_kfree_skb(rfbuf->skb);
+                       rfbuf->skb = NULL;
+               }
+       }
+
+       size =  sizeof(struct emac_buffer) * rx_q->rfd.count;
+       memset(rx_q->rfd.rfbuff, 0, size);
+
+       /* clear the descriptor rings */
+       memset(rx_q->rrd.v_addr, 0, rx_q->rrd.size);
+       rx_q->rrd.produce_idx = 0;
+       rx_q->rrd.consume_idx = 0;
+
+       memset(rx_q->rfd.v_addr, 0, rx_q->rfd.size);
+       rx_q->rfd.produce_idx = 0;
+       rx_q->rfd.consume_idx = 0;
+}
+
+/* Free all buffers associated with given transmit queue */
+static void emac_tx_q_bufs_free(struct emac_adapter *adpt)
+{
+       struct emac_tx_queue *tx_q = &adpt->tx_q;
+
+       emac_tx_q_descs_free(adpt);
+
+       kfree(tx_q->tpd.tpbuff);
+       tx_q->tpd.tpbuff = NULL;
+       tx_q->tpd.v_addr = NULL;
+       tx_q->tpd.dma_addr = 0;
+       tx_q->tpd.size = 0;
+}
+
+/* Allocate TX descriptor ring for the given transmit queue */
+static int emac_tx_q_desc_alloc(struct emac_adapter *adpt,
+                               struct emac_tx_queue *tx_q)
+{
+       struct emac_ring_header *ring_header = &adpt->ring_header;
+       size_t size;
+
+       size = sizeof(struct emac_buffer) * tx_q->tpd.count;
+       tx_q->tpd.tpbuff = kzalloc(size, GFP_KERNEL);
+       if (!tx_q->tpd.tpbuff)
+               return -ENOMEM;
+
+       tx_q->tpd.size = tx_q->tpd.count * (adpt->tpd_size * 4);
+       tx_q->tpd.dma_addr = ring_header->dma_addr + ring_header->used;
+       tx_q->tpd.v_addr = ring_header->v_addr + ring_header->used;
+       ring_header->used += ALIGN(tx_q->tpd.size, 8);
+       tx_q->tpd.produce_idx = 0;
+       tx_q->tpd.consume_idx = 0;
+
+       return 0;
+}
+
+/* Free all buffers associated with given transmit queue */
+static void emac_rx_q_bufs_free(struct emac_adapter *adpt)
+{
+       struct emac_rx_queue *rx_q = &adpt->rx_q;
+
+       emac_rx_q_free_descs(adpt);
+
+       kfree(rx_q->rfd.rfbuff);
+       rx_q->rfd.rfbuff   = NULL;
+
+       rx_q->rfd.v_addr   = NULL;
+       rx_q->rfd.dma_addr = 0;
+       rx_q->rfd.size     = 0;
+
+       rx_q->rrd.v_addr   = NULL;
+       rx_q->rrd.dma_addr = 0;
+       rx_q->rrd.size     = 0;
+}
+
+/* Allocate RX descriptor rings for the given receive queue */
+static int emac_rx_descs_alloc(struct emac_adapter *adpt)
+{
+       struct emac_ring_header *ring_header = &adpt->ring_header;
+       struct emac_rx_queue *rx_q = &adpt->rx_q;
+       size_t size;
+
+       size = sizeof(struct emac_buffer) * rx_q->rfd.count;
+       rx_q->rfd.rfbuff = kzalloc(size, GFP_KERNEL);
+       if (!rx_q->rfd.rfbuff)
+               return -ENOMEM;
+
+       rx_q->rrd.size = rx_q->rrd.count * (adpt->rrd_size * 4);
+       rx_q->rfd.size = rx_q->rfd.count * (adpt->rfd_size * 4);
+
+       rx_q->rrd.dma_addr = ring_header->dma_addr + ring_header->used;
+       rx_q->rrd.v_addr   = ring_header->v_addr + ring_header->used;
+       ring_header->used += ALIGN(rx_q->rrd.size, 8);
+
+       rx_q->rfd.dma_addr = ring_header->dma_addr + ring_header->used;
+       rx_q->rfd.v_addr   = ring_header->v_addr + ring_header->used;
+       ring_header->used += ALIGN(rx_q->rfd.size, 8);
+
+       rx_q->rrd.produce_idx = 0;
+       rx_q->rrd.consume_idx = 0;
+
+       rx_q->rfd.produce_idx = 0;
+       rx_q->rfd.consume_idx = 0;
+
+       return 0;
+}
+
+/* Allocate all TX and RX descriptor rings */
+int emac_mac_rx_tx_rings_alloc_all(struct emac_adapter *adpt)
+{
+       struct emac_ring_header *ring_header = &adpt->ring_header;
+       struct device *dev = adpt->netdev->dev.parent;
+       unsigned int num_tx_descs = adpt->tx_desc_cnt;
+       unsigned int num_rx_descs = adpt->rx_desc_cnt;
+       int ret;
+
+       adpt->tx_q.tpd.count = adpt->tx_desc_cnt;
+
+       adpt->rx_q.rrd.count = adpt->rx_desc_cnt;
+       adpt->rx_q.rfd.count = adpt->rx_desc_cnt;
+
+       /* Ring DMA buffer. Each ring may need up to 8 bytes for alignment,
+        * hence the additional padding bytes are allocated.
+        */
+       ring_header->size = num_tx_descs * (adpt->tpd_size * 4) +
+                           num_rx_descs * (adpt->rfd_size * 4) +
+                           num_rx_descs * (adpt->rrd_size * 4) +
+                           8 + 2 * 8; /* 8 byte per one Tx and two Rx rings */
+
+       ring_header->used = 0;
+       ring_header->v_addr = dma_zalloc_coherent(dev, ring_header->size,
+                                                &ring_header->dma_addr,
+                                                GFP_KERNEL);
+       if (!ring_header->v_addr)
+               return -ENOMEM;
+
+       ring_header->used = ALIGN(ring_header->dma_addr, 8) -
+                                                       ring_header->dma_addr;
+
+       ret = emac_tx_q_desc_alloc(adpt, &adpt->tx_q);
+       if (ret) {
+               netdev_err(adpt->netdev, "error: Tx Queue alloc failed\n");
+               goto err_alloc_tx;
+       }
+
+       ret = emac_rx_descs_alloc(adpt);
+       if (ret) {
+               netdev_err(adpt->netdev, "error: Rx Queue alloc failed\n");
+               goto err_alloc_rx;
+       }
+
+       return 0;
+
+err_alloc_rx:
+       emac_tx_q_bufs_free(adpt);
+err_alloc_tx:
+       dma_free_coherent(dev, ring_header->size,
+                         ring_header->v_addr, ring_header->dma_addr);
+
+       ring_header->v_addr   = NULL;
+       ring_header->dma_addr = 0;
+       ring_header->size     = 0;
+       ring_header->used     = 0;
+
+       return ret;
+}
+
+/* Free all TX and RX descriptor rings */
+void emac_mac_rx_tx_rings_free_all(struct emac_adapter *adpt)
+{
+       struct emac_ring_header *ring_header = &adpt->ring_header;
+       struct device *dev = adpt->netdev->dev.parent;
+
+       emac_tx_q_bufs_free(adpt);
+       emac_rx_q_bufs_free(adpt);
+
+       dma_free_coherent(dev, ring_header->size,
+                         ring_header->v_addr, ring_header->dma_addr);
+
+       ring_header->v_addr   = NULL;
+       ring_header->dma_addr = 0;
+       ring_header->size     = 0;
+       ring_header->used     = 0;
+}
+
+/* Initialize descriptor rings */
+static void emac_mac_rx_tx_ring_reset_all(struct emac_adapter *adpt)
+{
+       unsigned int i;
+
+       adpt->tx_q.tpd.produce_idx = 0;
+       adpt->tx_q.tpd.consume_idx = 0;
+       for (i = 0; i < adpt->tx_q.tpd.count; i++)
+               adpt->tx_q.tpd.tpbuff[i].dma_addr = 0;
+
+       adpt->rx_q.rrd.produce_idx = 0;
+       adpt->rx_q.rrd.consume_idx = 0;
+       adpt->rx_q.rfd.produce_idx = 0;
+       adpt->rx_q.rfd.consume_idx = 0;
+       for (i = 0; i < adpt->rx_q.rfd.count; i++)
+               adpt->rx_q.rfd.rfbuff[i].dma_addr = 0;
+}
+
+/* Produce new receive free descriptor */
+static void emac_mac_rx_rfd_create(struct emac_adapter *adpt,
+                                  struct emac_rx_queue *rx_q,
+                                  dma_addr_t addr)
+{
+       u32 *hw_rfd = EMAC_RFD(rx_q, adpt->rfd_size, rx_q->rfd.produce_idx);
+
+       *(hw_rfd++) = lower_32_bits(addr);
+       *hw_rfd = upper_32_bits(addr);
+
+       if (++rx_q->rfd.produce_idx == rx_q->rfd.count)
+               rx_q->rfd.produce_idx = 0;
+}
+
+/* Fill up receive queue's RFD with preallocated receive buffers */
+static void emac_mac_rx_descs_refill(struct emac_adapter *adpt,
+                                   struct emac_rx_queue *rx_q)
+{
+       struct emac_buffer *curr_rxbuf;
+       struct emac_buffer *next_rxbuf;
+       unsigned int count = 0;
+       u32 next_produce_idx;
+
+       next_produce_idx = rx_q->rfd.produce_idx + 1;
+       if (next_produce_idx == rx_q->rfd.count)
+               next_produce_idx = 0;
+
+       curr_rxbuf = GET_RFD_BUFFER(rx_q, rx_q->rfd.produce_idx);
+       next_rxbuf = GET_RFD_BUFFER(rx_q, next_produce_idx);
+
+       /* this always has a blank rx_buffer*/
+       while (!next_rxbuf->dma_addr) {
+               struct sk_buff *skb;
+               int ret;
+
+               skb = netdev_alloc_skb_ip_align(adpt->netdev, adpt->rxbuf_size);
+               if (!skb)
+                       break;
+
+               curr_rxbuf->dma_addr =
+                       dma_map_single(adpt->netdev->dev.parent, skb->data,
+                                      curr_rxbuf->length, DMA_FROM_DEVICE);
+               ret = dma_mapping_error(adpt->netdev->dev.parent,
+                                       curr_rxbuf->dma_addr);
+               if (ret) {
+                       dev_kfree_skb(skb);
+                       break;
+               }
+               curr_rxbuf->skb = skb;
+               curr_rxbuf->length = adpt->rxbuf_size;
+
+               emac_mac_rx_rfd_create(adpt, rx_q, curr_rxbuf->dma_addr);
+               next_produce_idx = rx_q->rfd.produce_idx + 1;
+               if (next_produce_idx == rx_q->rfd.count)
+                       next_produce_idx = 0;
+
+               curr_rxbuf = GET_RFD_BUFFER(rx_q, rx_q->rfd.produce_idx);
+               next_rxbuf = GET_RFD_BUFFER(rx_q, next_produce_idx);
+               count++;
+       }
+
+       if (count) {
+               u32 prod_idx = (rx_q->rfd.produce_idx << rx_q->produce_shift) &
+                               rx_q->produce_mask;
+               emac_reg_update32(adpt->base + rx_q->produce_reg,
+                                 rx_q->produce_mask, prod_idx);
+       }
+}
+
+static void emac_adjust_link(struct net_device *netdev)
+{
+       struct emac_adapter *adpt = netdev_priv(netdev);
+       struct phy_device *phydev = netdev->phydev;
+
+       if (phydev->link)
+               emac_mac_start(adpt);
+       else
+               emac_mac_stop(adpt);
+
+       phy_print_status(phydev);
+}
+
+/* Bringup the interface/HW */
+int emac_mac_up(struct emac_adapter *adpt)
+{
+       struct net_device *netdev = adpt->netdev;
+       struct emac_irq *irq = &adpt->irq;
+       int ret;
+
+       emac_mac_rx_tx_ring_reset_all(adpt);
+       emac_mac_config(adpt);
+
+       ret = request_irq(irq->irq, emac_isr, 0, EMAC_MAC_IRQ_RES, irq);
+       if (ret) {
+               netdev_err(adpt->netdev, "could not request %s irq\n",
+                          EMAC_MAC_IRQ_RES);
+               return ret;
+       }
+
+       emac_mac_rx_descs_refill(adpt, &adpt->rx_q);
+
+       ret = phy_connect_direct(netdev, adpt->phydev, emac_adjust_link,
+                                PHY_INTERFACE_MODE_SGMII);
+       if (ret) {
+               netdev_err(adpt->netdev, "could not connect phy\n");
+               free_irq(irq->irq, irq);
+               return ret;
+       }
+
+       /* enable mac irq */
+       writel((u32)~DIS_INT, adpt->base + EMAC_INT_STATUS);
+       writel(adpt->irq.mask, adpt->base + EMAC_INT_MASK);
+
+       adpt->phydev->irq = PHY_IGNORE_INTERRUPT;
+       phy_start(adpt->phydev);
+
+       napi_enable(&adpt->rx_q.napi);
+       netif_start_queue(netdev);
+
+       return 0;
+}
+
+/* Bring down the interface/HW */
+void emac_mac_down(struct emac_adapter *adpt)
+{
+       struct net_device *netdev = adpt->netdev;
+
+       netif_stop_queue(netdev);
+       napi_disable(&adpt->rx_q.napi);
+
+       phy_stop(adpt->phydev);
+       phy_disconnect(adpt->phydev);
+
+       /* disable mac irq */
+       writel(DIS_INT, adpt->base + EMAC_INT_STATUS);
+       writel(0, adpt->base + EMAC_INT_MASK);
+       synchronize_irq(adpt->irq.irq);
+       free_irq(adpt->irq.irq, &adpt->irq);
+
+       emac_mac_reset(adpt);
+
+       emac_tx_q_descs_free(adpt);
+       netdev_reset_queue(adpt->netdev);
+       emac_rx_q_free_descs(adpt);
+}
+
+/* Consume next received packet descriptor */
+static bool emac_rx_process_rrd(struct emac_adapter *adpt,
+                               struct emac_rx_queue *rx_q,
+                               struct emac_rrd *rrd)
+{
+       u32 *hw_rrd = EMAC_RRD(rx_q, adpt->rrd_size, rx_q->rrd.consume_idx);
+
+       rrd->word[3] = *(hw_rrd + 3);
+
+       if (!RRD_UPDT(rrd))
+               return false;
+
+       rrd->word[4] = 0;
+       rrd->word[5] = 0;
+
+       rrd->word[0] = *(hw_rrd++);
+       rrd->word[1] = *(hw_rrd++);
+       rrd->word[2] = *(hw_rrd++);
+
+       if (unlikely(RRD_NOR(rrd) != 1)) {
+               netdev_err(adpt->netdev,
+                          "error: multi-RFD not support yet! nor:%lu\n",
+                          RRD_NOR(rrd));
+       }
+
+       /* mark rrd as processed */
+       RRD_UPDT_SET(rrd, 0);
+       *hw_rrd = rrd->word[3];
+
+       if (++rx_q->rrd.consume_idx == rx_q->rrd.count)
+               rx_q->rrd.consume_idx = 0;
+
+       return true;
+}
+
+/* Produce new transmit descriptor */
+static void emac_tx_tpd_create(struct emac_adapter *adpt,
+                              struct emac_tx_queue *tx_q, struct emac_tpd *tpd)
+{
+       u32 *hw_tpd;
+
+       tx_q->tpd.last_produce_idx = tx_q->tpd.produce_idx;
+       hw_tpd = EMAC_TPD(tx_q, adpt->tpd_size, tx_q->tpd.produce_idx);
+
+       if (++tx_q->tpd.produce_idx == tx_q->tpd.count)
+               tx_q->tpd.produce_idx = 0;
+
+       *(hw_tpd++) = tpd->word[0];
+       *(hw_tpd++) = tpd->word[1];
+       *(hw_tpd++) = tpd->word[2];
+       *hw_tpd = tpd->word[3];
+}
+
+/* Mark the last transmit descriptor as such (for the transmit packet) */
+static void emac_tx_tpd_mark_last(struct emac_adapter *adpt,
+                                 struct emac_tx_queue *tx_q)
+{
+       u32 *hw_tpd =
+               EMAC_TPD(tx_q, adpt->tpd_size, tx_q->tpd.last_produce_idx);
+       u32 tmp_tpd;
+
+       tmp_tpd = *(hw_tpd + 1);
+       tmp_tpd |= EMAC_TPD_LAST_FRAGMENT;
+       *(hw_tpd + 1) = tmp_tpd;
+}
+
+static void emac_rx_rfd_clean(struct emac_rx_queue *rx_q, struct emac_rrd *rrd)
+{
+       struct emac_buffer *rfbuf = rx_q->rfd.rfbuff;
+       u32 consume_idx = RRD_SI(rrd);
+       unsigned int i;
+
+       for (i = 0; i < RRD_NOR(rrd); i++) {
+               rfbuf[consume_idx].skb = NULL;
+               if (++consume_idx == rx_q->rfd.count)
+                       consume_idx = 0;
+       }
+
+       rx_q->rfd.consume_idx = consume_idx;
+       rx_q->rfd.process_idx = consume_idx;
+}
+
+/* Push the received skb to upper layers */
+static void emac_receive_skb(struct emac_rx_queue *rx_q,
+                            struct sk_buff *skb,
+                            u16 vlan_tag, bool vlan_flag)
+{
+       if (vlan_flag) {
+               u16 vlan;
+
+               EMAC_TAG_TO_VLAN(vlan_tag, vlan);
+               __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan);
+       }
+
+       napi_gro_receive(&rx_q->napi, skb);
+}
+
+/* Process receive event */
+void emac_mac_rx_process(struct emac_adapter *adpt, struct emac_rx_queue *rx_q,
+                        int *num_pkts, int max_pkts)
+{
+       u32 proc_idx, hw_consume_idx, num_consume_pkts;
+       struct net_device *netdev  = adpt->netdev;
+       struct emac_buffer *rfbuf;
+       unsigned int count = 0;
+       struct emac_rrd rrd;
+       struct sk_buff *skb;
+       u32 reg;
+
+       reg = readl_relaxed(adpt->base + rx_q->consume_reg);
+
+       hw_consume_idx = (reg & rx_q->consume_mask) >> rx_q->consume_shift;
+       num_consume_pkts = (hw_consume_idx >= rx_q->rrd.consume_idx) ?
+               (hw_consume_idx -  rx_q->rrd.consume_idx) :
+               (hw_consume_idx + rx_q->rrd.count - rx_q->rrd.consume_idx);
+
+       do {
+               if (!num_consume_pkts)
+                       break;
+
+               if (!emac_rx_process_rrd(adpt, rx_q, &rrd))
+                       break;
+
+               if (likely(RRD_NOR(&rrd) == 1)) {
+                       /* good receive */
+                       rfbuf = GET_RFD_BUFFER(rx_q, RRD_SI(&rrd));
+                       dma_unmap_single(adpt->netdev->dev.parent,
+                                        rfbuf->dma_addr, rfbuf->length,
+                                        DMA_FROM_DEVICE);
+                       rfbuf->dma_addr = 0;
+                       skb = rfbuf->skb;
+               } else {
+                       netdev_err(adpt->netdev,
+                                  "error: multi-RFD not support yet!\n");
+                       break;
+               }
+               emac_rx_rfd_clean(rx_q, &rrd);
+               num_consume_pkts--;
+               count++;
+
+               /* Due to a HW issue in L4 check sum detection (UDP/TCP frags
+                * with DF set are marked as error), drop packets based on the
+                * error mask rather than the summary bit (ignoring L4F errors)
+                */
+               if (rrd.word[EMAC_RRD_STATS_DW_IDX] & EMAC_RRD_ERROR) {
+                       netif_dbg(adpt, rx_status, adpt->netdev,
+                                 "Drop error packet[RRD: 0x%x:0x%x:0x%x:0x%x]\n",
+                                 rrd.word[0], rrd.word[1],
+                                 rrd.word[2], rrd.word[3]);
+
+                       dev_kfree_skb(skb);
+                       continue;
+               }
+
+               skb_put(skb, RRD_PKT_SIZE(&rrd) - ETH_FCS_LEN);
+               skb->dev = netdev;
+               skb->protocol = eth_type_trans(skb, skb->dev);
+               if (netdev->features & NETIF_F_RXCSUM)
+                       skb->ip_summed = RRD_L4F(&rrd) ?
+                                         CHECKSUM_NONE : CHECKSUM_UNNECESSARY;
+               else
+                       skb_checksum_none_assert(skb);
+
+               emac_receive_skb(rx_q, skb, (u16)RRD_CVALN_TAG(&rrd),
+                                (bool)RRD_CVTAG(&rrd));
+
+               netdev->last_rx = jiffies;
+               (*num_pkts)++;
+       } while (*num_pkts < max_pkts);
+
+       if (count) {
+               proc_idx = (rx_q->rfd.process_idx << rx_q->process_shft) &
+                               rx_q->process_mask;
+               emac_reg_update32(adpt->base + rx_q->process_reg,
+                                 rx_q->process_mask, proc_idx);
+               emac_mac_rx_descs_refill(adpt, rx_q);
+       }
+}
+
+/* get the number of free transmit descriptors */
+static unsigned int emac_tpd_num_free_descs(struct emac_tx_queue *tx_q)
+{
+       u32 produce_idx = tx_q->tpd.produce_idx;
+       u32 consume_idx = tx_q->tpd.consume_idx;
+
+       return (consume_idx > produce_idx) ?
+               (consume_idx - produce_idx - 1) :
+               (tx_q->tpd.count + consume_idx - produce_idx - 1);
+}
+
+/* Process transmit event */
+void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q)
+{
+       u32 reg = readl_relaxed(adpt->base + tx_q->consume_reg);
+       u32 hw_consume_idx, pkts_compl = 0, bytes_compl = 0;
+       struct emac_buffer *tpbuf;
+
+       hw_consume_idx = (reg & tx_q->consume_mask) >> tx_q->consume_shift;
+
+       while (tx_q->tpd.consume_idx != hw_consume_idx) {
+               tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.consume_idx);
+               if (tpbuf->dma_addr) {
+                       dma_unmap_single(adpt->netdev->dev.parent,
+                                        tpbuf->dma_addr, tpbuf->length,
+                                        DMA_TO_DEVICE);
+                       tpbuf->dma_addr = 0;
+               }
+
+               if (tpbuf->skb) {
+                       pkts_compl++;
+                       bytes_compl += tpbuf->skb->len;
+                       dev_kfree_skb_irq(tpbuf->skb);
+                       tpbuf->skb = NULL;
+               }
+
+               if (++tx_q->tpd.consume_idx == tx_q->tpd.count)
+                       tx_q->tpd.consume_idx = 0;
+       }
+
+       netdev_completed_queue(adpt->netdev, pkts_compl, bytes_compl);
+
+       if (netif_queue_stopped(adpt->netdev))
+               if (emac_tpd_num_free_descs(tx_q) > (MAX_SKB_FRAGS + 1))
+                       netif_wake_queue(adpt->netdev);
+}
+
+/* Initialize all queue data structures */
+void emac_mac_rx_tx_ring_init_all(struct platform_device *pdev,
+                                 struct emac_adapter *adpt)
+{
+       adpt->rx_q.netdev = adpt->netdev;
+
+       adpt->rx_q.produce_reg  = EMAC_MAILBOX_0;
+       adpt->rx_q.produce_mask = RFD0_PROD_IDX_BMSK;
+       adpt->rx_q.produce_shift = RFD0_PROD_IDX_SHFT;
+
+       adpt->rx_q.process_reg  = EMAC_MAILBOX_0;
+       adpt->rx_q.process_mask = RFD0_PROC_IDX_BMSK;
+       adpt->rx_q.process_shft = RFD0_PROC_IDX_SHFT;
+
+       adpt->rx_q.consume_reg  = EMAC_MAILBOX_3;
+       adpt->rx_q.consume_mask = RFD0_CONS_IDX_BMSK;
+       adpt->rx_q.consume_shift = RFD0_CONS_IDX_SHFT;
+
+       adpt->rx_q.irq          = &adpt->irq;
+       adpt->rx_q.intr         = adpt->irq.mask & ISR_RX_PKT;
+
+       adpt->tx_q.produce_reg  = EMAC_MAILBOX_15;
+       adpt->tx_q.produce_mask = NTPD_PROD_IDX_BMSK;
+       adpt->tx_q.produce_shift = NTPD_PROD_IDX_SHFT;
+
+       adpt->tx_q.consume_reg  = EMAC_MAILBOX_2;
+       adpt->tx_q.consume_mask = NTPD_CONS_IDX_BMSK;
+       adpt->tx_q.consume_shift = NTPD_CONS_IDX_SHFT;
+}
+
+/* Fill up transmit descriptors with TSO and Checksum offload information */
+static int emac_tso_csum(struct emac_adapter *adpt,
+                        struct emac_tx_queue *tx_q,
+                        struct sk_buff *skb,
+                        struct emac_tpd *tpd)
+{
+       unsigned int hdr_len;
+       int ret;
+
+       if (skb_is_gso(skb)) {
+               if (skb_header_cloned(skb)) {
+                       ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+                       if (unlikely(ret))
+                               return ret;
+               }
+
+               if (skb->protocol == htons(ETH_P_IP)) {
+                       u32 pkt_len = ((unsigned char *)ip_hdr(skb) - skb->data)
+                                      + ntohs(ip_hdr(skb)->tot_len);
+                       if (skb->len > pkt_len)
+                               pskb_trim(skb, pkt_len);
+               }
+
+               hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+               if (unlikely(skb->len == hdr_len)) {
+                       /* we only need to do csum */
+                       netif_warn(adpt, tx_err, adpt->netdev,
+                                  "tso not needed for packet with 0 data\n");
+                       goto do_csum;
+               }
+
+               if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
+                       ip_hdr(skb)->check = 0;
+                       tcp_hdr(skb)->check =
+                               ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+                                                  ip_hdr(skb)->daddr,
+                                                  0, IPPROTO_TCP, 0);
+                       TPD_IPV4_SET(tpd, 1);
+               }
+
+               if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
+                       /* ipv6 tso need an extra tpd */
+                       struct emac_tpd extra_tpd;
+
+                       memset(tpd, 0, sizeof(*tpd));
+                       memset(&extra_tpd, 0, sizeof(extra_tpd));
+
+                       ipv6_hdr(skb)->payload_len = 0;
+                       tcp_hdr(skb)->check =
+                               ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+                                                &ipv6_hdr(skb)->daddr,
+                                                0, IPPROTO_TCP, 0);
+                       TPD_PKT_LEN_SET(&extra_tpd, skb->len);
+                       TPD_LSO_SET(&extra_tpd, 1);
+                       TPD_LSOV_SET(&extra_tpd, 1);
+                       emac_tx_tpd_create(adpt, tx_q, &extra_tpd);
+                       TPD_LSOV_SET(tpd, 1);
+               }
+
+               TPD_LSO_SET(tpd, 1);
+               TPD_TCPHDR_OFFSET_SET(tpd, skb_transport_offset(skb));
+               TPD_MSS_SET(tpd, skb_shinfo(skb)->gso_size);
+               return 0;
+       }
+
+do_csum:
+       if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+               unsigned int css, cso;
+
+               cso = skb_transport_offset(skb);
+               if (unlikely(cso & 0x1)) {
+                       netdev_err(adpt->netdev,
+                                  "error: payload offset should be even\n");
+                       return -EINVAL;
+               }
+               css = cso + skb->csum_offset;
+
+               TPD_PAYLOAD_OFFSET_SET(tpd, cso >> 1);
+               TPD_CXSUM_OFFSET_SET(tpd, css >> 1);
+               TPD_CSX_SET(tpd, 1);
+       }
+
+       return 0;
+}
+
+/* Fill up transmit descriptors */
+static void emac_tx_fill_tpd(struct emac_adapter *adpt,
+                            struct emac_tx_queue *tx_q, struct sk_buff *skb,
+                            struct emac_tpd *tpd)
+{
+       unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
+       unsigned int first = tx_q->tpd.produce_idx;
+       unsigned int len = skb_headlen(skb);
+       struct emac_buffer *tpbuf = NULL;
+       unsigned int mapped_len = 0;
+       unsigned int i;
+       int count = 0;
+       int ret;
+
+       /* if Large Segment Offload is (in TCP Segmentation Offload struct) */
+       if (TPD_LSO(tpd)) {
+               mapped_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+
+               tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx);
+               tpbuf->length = mapped_len;
+               tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent,
+                                                skb->data, tpbuf->length,
+                                                DMA_TO_DEVICE);
+               ret = dma_mapping_error(adpt->netdev->dev.parent,
+                                       tpbuf->dma_addr);
+               if (ret)
+                       goto error;
+
+               TPD_BUFFER_ADDR_L_SET(tpd, lower_32_bits(tpbuf->dma_addr));
+               TPD_BUFFER_ADDR_H_SET(tpd, upper_32_bits(tpbuf->dma_addr));
+               TPD_BUF_LEN_SET(tpd, tpbuf->length);
+               emac_tx_tpd_create(adpt, tx_q, tpd);
+               count++;
+       }
+
+       if (mapped_len < len) {
+               tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx);
+               tpbuf->length = len - mapped_len;
+               tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent,
+                                                skb->data + mapped_len,
+                                                tpbuf->length, DMA_TO_DEVICE);
+               ret = dma_mapping_error(adpt->netdev->dev.parent,
+                                       tpbuf->dma_addr);
+               if (ret)
+                       goto error;
+
+               TPD_BUFFER_ADDR_L_SET(tpd, lower_32_bits(tpbuf->dma_addr));
+               TPD_BUFFER_ADDR_H_SET(tpd, upper_32_bits(tpbuf->dma_addr));
+               TPD_BUF_LEN_SET(tpd, tpbuf->length);
+               emac_tx_tpd_create(adpt, tx_q, tpd);
+               count++;
+       }
+
+       for (i = 0; i < nr_frags; i++) {
+               struct skb_frag_struct *frag;
+
+               frag = &skb_shinfo(skb)->frags[i];
+
+               tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx);
+               tpbuf->length = frag->size;
+               tpbuf->dma_addr = dma_map_page(adpt->netdev->dev.parent,
+                                              frag->page.p, frag->page_offset,
+                                              tpbuf->length, DMA_TO_DEVICE);
+               ret = dma_mapping_error(adpt->netdev->dev.parent,
+                                       tpbuf->dma_addr);
+               if (ret)
+                       goto error;
+
+               TPD_BUFFER_ADDR_L_SET(tpd, lower_32_bits(tpbuf->dma_addr));
+               TPD_BUFFER_ADDR_H_SET(tpd, upper_32_bits(tpbuf->dma_addr));
+               TPD_BUF_LEN_SET(tpd, tpbuf->length);
+               emac_tx_tpd_create(adpt, tx_q, tpd);
+               count++;
+       }
+
+       /* The last tpd */
+       wmb();
+       emac_tx_tpd_mark_last(adpt, tx_q);
+
+       /* The last buffer info contain the skb address,
+        * so it will be freed after unmap
+        */
+       tpbuf->skb = skb;
+
+       return;
+
+error:
+       /* One of the memory mappings failed, so undo everything */
+       tx_q->tpd.produce_idx = first;
+
+       while (count--) {
+               tpbuf = GET_TPD_BUFFER(tx_q, first);
+               dma_unmap_page(adpt->netdev->dev.parent, tpbuf->dma_addr,
+                              tpbuf->length, DMA_TO_DEVICE);
+               tpbuf->dma_addr = 0;
+               tpbuf->length = 0;
+
+               if (++first == tx_q->tpd.count)
+                       first = 0;
+       }
+
+       dev_kfree_skb(skb);
+}
+
+/* Transmit the packet using specified transmit queue */
+int emac_mac_tx_buf_send(struct emac_adapter *adpt, struct emac_tx_queue *tx_q,
+                        struct sk_buff *skb)
+{
+       struct emac_tpd tpd;
+       u32 prod_idx;
+
+       memset(&tpd, 0, sizeof(tpd));
+
+       if (emac_tso_csum(adpt, tx_q, skb, &tpd) != 0) {
+               dev_kfree_skb_any(skb);
+               return NETDEV_TX_OK;
+       }
+
+       if (skb_vlan_tag_present(skb)) {
+               u16 tag;
+
+               EMAC_VLAN_TO_TAG(skb_vlan_tag_get(skb), tag);
+               TPD_CVLAN_TAG_SET(&tpd, tag);
+               TPD_INSTC_SET(&tpd, 1);
+       }
+
+       if (skb_network_offset(skb) != ETH_HLEN)
+               TPD_TYP_SET(&tpd, 1);
+
+       emac_tx_fill_tpd(adpt, tx_q, skb, &tpd);
+
+       netdev_sent_queue(adpt->netdev, skb->len);
+
+       /* Make sure the are enough free descriptors to hold one
+        * maximum-sized SKB.  We need one desc for each fragment,
+        * one for the checksum (emac_tso_csum), one for TSO, and
+        * and one for the SKB header.
+        */
+       if (emac_tpd_num_free_descs(tx_q) < (MAX_SKB_FRAGS + 3))
+               netif_stop_queue(adpt->netdev);
+
+       /* update produce idx */
+       prod_idx = (tx_q->tpd.produce_idx << tx_q->produce_shift) &
+                   tx_q->produce_mask;
+       emac_reg_update32(adpt->base + tx_q->produce_reg,
+                         tx_q->produce_mask, prod_idx);
+
+       return NETDEV_TX_OK;
+}
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.h b/drivers/net/ethernet/qualcomm/emac/emac-mac.h
new file mode 100644 (file)
index 0000000..f3aa24d
--- /dev/null
@@ -0,0 +1,248 @@
+/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* EMAC DMA HW engine uses three rings:
+ * Tx:
+ *   TPD: Transmit Packet Descriptor ring.
+ * Rx:
+ *   RFD: Receive Free Descriptor ring.
+ *     Ring of descriptors with empty buffers to be filled by Rx HW.
+ *   RRD: Receive Return Descriptor ring.
+ *     Ring of descriptors with buffers filled with received data.
+ */
+
+#ifndef _EMAC_HW_H_
+#define _EMAC_HW_H_
+
+/* EMAC_CSR register offsets */
+#define EMAC_EMAC_WRAPPER_CSR1                                0x000000
+#define EMAC_EMAC_WRAPPER_CSR2                                0x000004
+#define EMAC_EMAC_WRAPPER_TX_TS_LO                            0x000104
+#define EMAC_EMAC_WRAPPER_TX_TS_HI                            0x000108
+#define EMAC_EMAC_WRAPPER_TX_TS_INX                           0x00010c
+
+/* DMA Order Settings */
+enum emac_dma_order {
+       emac_dma_ord_in = 1,
+       emac_dma_ord_enh = 2,
+       emac_dma_ord_out = 4
+};
+
+enum emac_dma_req_block {
+       emac_dma_req_128 = 0,
+       emac_dma_req_256 = 1,
+       emac_dma_req_512 = 2,
+       emac_dma_req_1024 = 3,
+       emac_dma_req_2048 = 4,
+       emac_dma_req_4096 = 5
+};
+
+/* Returns the value of bits idx...idx+n_bits */
+#define BITS_GET(val, lo, hi) ((le32_to_cpu(val) & GENMASK((hi), (lo))) >> lo)
+#define BITS_SET(val, lo, hi, new_val) \
+       val = cpu_to_le32((le32_to_cpu(val) & (~GENMASK((hi), (lo)))) | \
+               (((new_val) << (lo)) & GENMASK((hi), (lo))))
+
+/* RRD (Receive Return Descriptor) */
+struct emac_rrd {
+       u32     word[6];
+
+/* number of RFD */
+#define RRD_NOR(rrd)                   BITS_GET((rrd)->word[0], 16, 19)
+/* start consumer index of rfd-ring */
+#define RRD_SI(rrd)                    BITS_GET((rrd)->word[0], 20, 31)
+/* vlan-tag (CVID, CFI and PRI) */
+#define RRD_CVALN_TAG(rrd)             BITS_GET((rrd)->word[2], 0, 15)
+/* length of the packet */
+#define RRD_PKT_SIZE(rrd)              BITS_GET((rrd)->word[3], 0, 13)
+/* L4(TCP/UDP) checksum failed */
+#define RRD_L4F(rrd)                   BITS_GET((rrd)->word[3], 14, 14)
+/* vlan tagged */
+#define RRD_CVTAG(rrd)                 BITS_GET((rrd)->word[3], 16, 16)
+/* When set, indicates that the descriptor is updated by the IP core.
+ * When cleared, indicates that the descriptor is invalid.
+ */
+#define RRD_UPDT(rrd)                  BITS_GET((rrd)->word[3], 31, 31)
+#define RRD_UPDT_SET(rrd, val)         BITS_SET((rrd)->word[3], 31, 31, val)
+/* timestamp low */
+#define RRD_TS_LOW(rrd)                        BITS_GET((rrd)->word[4], 0, 29)
+/* timestamp high */
+#define RRD_TS_HI(rrd)                 le32_to_cpu((rrd)->word[5])
+};
+
+/* TPD (Transmit Packet Descriptor) */
+struct emac_tpd {
+       u32                             word[4];
+
+/* Number of bytes of the transmit packet. (include 4-byte CRC) */
+#define TPD_BUF_LEN_SET(tpd, val)      BITS_SET((tpd)->word[0], 0, 15, val)
+/* Custom Checksum Offload: When set, ask IP core to offload custom checksum */
+#define TPD_CSX_SET(tpd, val)          BITS_SET((tpd)->word[1], 8, 8, val)
+/* TCP Large Send Offload: When set, ask IP core to do offload TCP Large Send */
+#define TPD_LSO(tpd)                   BITS_GET((tpd)->word[1], 12, 12)
+#define TPD_LSO_SET(tpd, val)          BITS_SET((tpd)->word[1], 12, 12, val)
+/*  Large Send Offload Version: When set, indicates this is an LSOv2
+ * (for both IPv4 and IPv6). When cleared, indicates this is an LSOv1
+ * (only for IPv4).
+ */
+#define TPD_LSOV_SET(tpd, val)         BITS_SET((tpd)->word[1], 13, 13, val)
+/* IPv4 packet: When set, indicates this is an  IPv4 packet, this bit is only
+ * for LSOV2 format.
+ */
+#define TPD_IPV4_SET(tpd, val)         BITS_SET((tpd)->word[1], 16, 16, val)
+/* 0: Ethernet   frame (DA+SA+TYPE+DATA+CRC)
+ * 1: IEEE 802.3 frame (DA+SA+LEN+DSAP+SSAP+CTL+ORG+TYPE+DATA+CRC)
+ */
+#define TPD_TYP_SET(tpd, val)          BITS_SET((tpd)->word[1], 17, 17, val)
+/* Low-32bit Buffer Address */
+#define TPD_BUFFER_ADDR_L_SET(tpd, val)        ((tpd)->word[2] = cpu_to_le32(val))
+/* CVLAN Tag to be inserted if INS_VLAN_TAG is set, CVLAN TPID based on global
+ * register configuration.
+ */
+#define TPD_CVLAN_TAG_SET(tpd, val)    BITS_SET((tpd)->word[3], 0, 15, val)
+/*  Insert CVlan Tag: When set, ask MAC to insert CVLAN TAG to outgoing packet
+ */
+#define TPD_INSTC_SET(tpd, val)                BITS_SET((tpd)->word[3], 17, 17, val)
+/* High-14bit Buffer Address, So, the 64b-bit address is
+ * {DESC_CTRL_11_TX_DATA_HIADDR[17:0],(register) BUFFER_ADDR_H, BUFFER_ADDR_L}
+ */
+#define TPD_BUFFER_ADDR_H_SET(tpd, val)        BITS_SET((tpd)->word[3], 18, 30, val)
+/* Format D. Word offset from the 1st byte of this packet to start to calculate
+ * the custom checksum.
+ */
+#define TPD_PAYLOAD_OFFSET_SET(tpd, val) BITS_SET((tpd)->word[1], 0, 7, val)
+/*  Format D. Word offset from the 1st byte of this packet to fill the custom
+ * checksum to
+ */
+#define TPD_CXSUM_OFFSET_SET(tpd, val) BITS_SET((tpd)->word[1], 18, 25, val)
+
+/* Format C. TCP Header offset from the 1st byte of this packet. (byte unit) */
+#define TPD_TCPHDR_OFFSET_SET(tpd, val)        BITS_SET((tpd)->word[1], 0, 7, val)
+/* Format C. MSS (Maximum Segment Size) got from the protocol layer. (byte unit)
+ */
+#define TPD_MSS_SET(tpd, val)          BITS_SET((tpd)->word[1], 18, 30, val)
+/* packet length in ext tpd */
+#define TPD_PKT_LEN_SET(tpd, val)      ((tpd)->word[2] = cpu_to_le32(val))
+};
+
+/* emac_ring_header represents a single, contiguous block of DMA space
+ * mapped for the three descriptor rings (tpd, rfd, rrd)
+ */
+struct emac_ring_header {
+       void                    *v_addr;        /* virtual address */
+       dma_addr_t              dma_addr;       /* dma address */
+       size_t                  size;           /* length in bytes */
+       size_t                  used;
+};
+
+/* emac_buffer is wrapper around a pointer to a socket buffer
+ * so a DMA handle can be stored along with the skb
+ */
+struct emac_buffer {
+       struct sk_buff          *skb;           /* socket buffer */
+       u16                     length;         /* rx buffer length */
+       dma_addr_t              dma_addr;       /* dma address */
+};
+
+/* receive free descriptor (rfd) ring */
+struct emac_rfd_ring {
+       struct emac_buffer      *rfbuff;
+       u32                     *v_addr;        /* virtual address */
+       dma_addr_t              dma_addr;       /* dma address */
+       size_t                  size;           /* length in bytes */
+       unsigned int            count;          /* number of desc in the ring */
+       unsigned int            produce_idx;
+       unsigned int            process_idx;
+       unsigned int            consume_idx;    /* unused */
+};
+
+/* Receive Return Desciptor (RRD) ring */
+struct emac_rrd_ring {
+       u32                     *v_addr;        /* virtual address */
+       dma_addr_t              dma_addr;       /* physical address */
+       size_t                  size;           /* length in bytes */
+       unsigned int            count;          /* number of desc in the ring */
+       unsigned int            produce_idx;    /* unused */
+       unsigned int            consume_idx;
+};
+
+/* Rx queue */
+struct emac_rx_queue {
+       struct net_device       *netdev;        /* netdev ring belongs to */
+       struct emac_rrd_ring    rrd;
+       struct emac_rfd_ring    rfd;
+       struct napi_struct      napi;
+       struct emac_irq         *irq;
+
+       u32                     intr;
+       u32                     produce_mask;
+       u32                     process_mask;
+       u32                     consume_mask;
+
+       u16                     produce_reg;
+       u16                     process_reg;
+       u16                     consume_reg;
+
+       u8                      produce_shift;
+       u8                      process_shft;
+       u8                      consume_shift;
+};
+
+/* Transimit Packet Descriptor (tpd) ring */
+struct emac_tpd_ring {
+       struct emac_buffer      *tpbuff;
+       u32                     *v_addr;        /* virtual address */
+       dma_addr_t              dma_addr;       /* dma address */
+
+       size_t                  size;           /* length in bytes */
+       unsigned int            count;          /* number of desc in the ring */
+       unsigned int            produce_idx;
+       unsigned int            consume_idx;
+       unsigned int            last_produce_idx;
+};
+
+/* Tx queue */
+struct emac_tx_queue {
+       struct emac_tpd_ring    tpd;
+
+       u32                     produce_mask;
+       u32                     consume_mask;
+
+       u16                     max_packets;    /* max packets per interrupt */
+       u16                     produce_reg;
+       u16                     consume_reg;
+
+       u8                      produce_shift;
+       u8                      consume_shift;
+};
+
+struct emac_adapter;
+
+int  emac_mac_up(struct emac_adapter *adpt);
+void emac_mac_down(struct emac_adapter *adpt);
+void emac_mac_reset(struct emac_adapter *adpt);
+void emac_mac_start(struct emac_adapter *adpt);
+void emac_mac_stop(struct emac_adapter *adpt);
+void emac_mac_mode_config(struct emac_adapter *adpt);
+void emac_mac_rx_process(struct emac_adapter *adpt, struct emac_rx_queue *rx_q,
+                        int *num_pkts, int max_pkts);
+int emac_mac_tx_buf_send(struct emac_adapter *adpt, struct emac_tx_queue *tx_q,
+                        struct sk_buff *skb);
+void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q);
+void emac_mac_rx_tx_ring_init_all(struct platform_device *pdev,
+                                 struct emac_adapter *adpt);
+int  emac_mac_rx_tx_rings_alloc_all(struct emac_adapter *adpt);
+void emac_mac_rx_tx_rings_free_all(struct emac_adapter *adpt);
+void emac_mac_multicast_addr_clear(struct emac_adapter *adpt);
+void emac_mac_multicast_addr_set(struct emac_adapter *adpt, u8 *addr);
+
+#endif /*_EMAC_HW_H_*/
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
new file mode 100644 (file)
index 0000000..c412ba9
--- /dev/null
@@ -0,0 +1,204 @@
+/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* Qualcomm Technologies, Inc. EMAC PHY Controller driver.
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_net.h>
+#include <linux/of_mdio.h>
+#include <linux/phy.h>
+#include <linux/iopoll.h>
+#include "emac.h"
+#include "emac-mac.h"
+#include "emac-phy.h"
+#include "emac-sgmii.h"
+
+/* EMAC base register offsets */
+#define EMAC_MDIO_CTRL                                        0x001414
+#define EMAC_PHY_STS                                          0x001418
+#define EMAC_MDIO_EX_CTRL                                     0x001440
+
+/* EMAC_MDIO_CTRL */
+#define MDIO_MODE                                              BIT(30)
+#define MDIO_PR                                                BIT(29)
+#define MDIO_AP_EN                                             BIT(28)
+#define MDIO_BUSY                                              BIT(27)
+#define MDIO_CLK_SEL_BMSK                                    0x7000000
+#define MDIO_CLK_SEL_SHFT                                           24
+#define MDIO_START                                             BIT(23)
+#define SUP_PREAMBLE                                           BIT(22)
+#define MDIO_RD_NWR                                            BIT(21)
+#define MDIO_REG_ADDR_BMSK                                    0x1f0000
+#define MDIO_REG_ADDR_SHFT                                          16
+#define MDIO_DATA_BMSK                                          0xffff
+#define MDIO_DATA_SHFT                                               0
+
+/* EMAC_PHY_STS */
+#define PHY_ADDR_BMSK                                         0x1f0000
+#define PHY_ADDR_SHFT                                               16
+
+#define MDIO_CLK_25_4                                                0
+#define MDIO_CLK_25_28                                               7
+
+#define MDIO_WAIT_TIMES                                           1000
+
+#define EMAC_LINK_SPEED_DEFAULT (\
+               EMAC_LINK_SPEED_10_HALF  |\
+               EMAC_LINK_SPEED_10_FULL  |\
+               EMAC_LINK_SPEED_100_HALF |\
+               EMAC_LINK_SPEED_100_FULL |\
+               EMAC_LINK_SPEED_1GB_FULL)
+
+/**
+ * emac_phy_mdio_autopoll_disable() - disable mdio autopoll
+ * @adpt: the emac adapter
+ *
+ * The autopoll feature takes over the MDIO bus.  In order for
+ * the PHY driver to be able to talk to the PHY over the MDIO
+ * bus, we need to temporarily disable the autopoll feature.
+ */
+static int emac_phy_mdio_autopoll_disable(struct emac_adapter *adpt)
+{
+       u32 val;
+
+       /* disable autopoll */
+       emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, MDIO_AP_EN, 0);
+
+       /* wait for any mdio polling to complete */
+       if (!readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, val,
+                               !(val & MDIO_BUSY), 100, MDIO_WAIT_TIMES * 100))
+               return 0;
+
+       /* failed to disable; ensure it is enabled before returning */
+       emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, 0, MDIO_AP_EN);
+
+       return -EBUSY;
+}
+
+/**
+ * emac_phy_mdio_autopoll_disable() - disable mdio autopoll
+ * @adpt: the emac adapter
+ *
+ * The EMAC has the ability to poll the external PHY on the MDIO
+ * bus for link state changes.  This eliminates the need for the
+ * driver to poll the phy.  If if the link state does change,
+ * the EMAC issues an interrupt on behalf of the PHY.
+ */
+static void emac_phy_mdio_autopoll_enable(struct emac_adapter *adpt)
+{
+       emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, 0, MDIO_AP_EN);
+}
+
+static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum)
+{
+       struct emac_adapter *adpt = bus->priv;
+       u32 reg;
+       int ret;
+
+       ret = emac_phy_mdio_autopoll_disable(adpt);
+       if (ret)
+               return ret;
+
+       emac_reg_update32(adpt->base + EMAC_PHY_STS, PHY_ADDR_BMSK,
+                         (addr << PHY_ADDR_SHFT));
+
+       reg = SUP_PREAMBLE |
+             ((MDIO_CLK_25_4 << MDIO_CLK_SEL_SHFT) & MDIO_CLK_SEL_BMSK) |
+             ((regnum << MDIO_REG_ADDR_SHFT) & MDIO_REG_ADDR_BMSK) |
+             MDIO_START | MDIO_RD_NWR;
+
+       writel(reg, adpt->base + EMAC_MDIO_CTRL);
+
+       if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg,
+                              !(reg & (MDIO_START | MDIO_BUSY)),
+                              100, MDIO_WAIT_TIMES * 100))
+               ret = -EIO;
+       else
+               ret = (reg >> MDIO_DATA_SHFT) & MDIO_DATA_BMSK;
+
+       emac_phy_mdio_autopoll_enable(adpt);
+
+       return ret;
+}
+
+static int emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val)
+{
+       struct emac_adapter *adpt = bus->priv;
+       u32 reg;
+       int ret;
+
+       ret = emac_phy_mdio_autopoll_disable(adpt);
+       if (ret)
+               return ret;
+
+       emac_reg_update32(adpt->base + EMAC_PHY_STS, PHY_ADDR_BMSK,
+                         (addr << PHY_ADDR_SHFT));
+
+       reg = SUP_PREAMBLE |
+               ((MDIO_CLK_25_4 << MDIO_CLK_SEL_SHFT) & MDIO_CLK_SEL_BMSK) |
+               ((regnum << MDIO_REG_ADDR_SHFT) & MDIO_REG_ADDR_BMSK) |
+               ((val << MDIO_DATA_SHFT) & MDIO_DATA_BMSK) |
+               MDIO_START;
+
+       writel(reg, adpt->base + EMAC_MDIO_CTRL);
+
+       if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg,
+                              !(reg & (MDIO_START | MDIO_BUSY)), 100,
+                              MDIO_WAIT_TIMES * 100))
+               ret = -EIO;
+
+       emac_phy_mdio_autopoll_enable(adpt);
+
+       return ret;
+}
+
+/* Configure the MDIO bus and connect the external PHY */
+int emac_phy_config(struct platform_device *pdev, struct emac_adapter *adpt)
+{
+       struct device_node *np = pdev->dev.of_node;
+       struct device_node *phy_np;
+       struct mii_bus *mii_bus;
+       int ret;
+
+       /* Create the mii_bus object for talking to the MDIO bus */
+       adpt->mii_bus = mii_bus = devm_mdiobus_alloc(&pdev->dev);
+       if (!mii_bus)
+               return -ENOMEM;
+
+       mii_bus->name = "emac-mdio";
+       snprintf(mii_bus->id, MII_BUS_ID_SIZE, "%s", pdev->name);
+       mii_bus->read = emac_mdio_read;
+       mii_bus->write = emac_mdio_write;
+       mii_bus->parent = &pdev->dev;
+       mii_bus->priv = adpt;
+
+       ret = of_mdiobus_register(mii_bus, np);
+       if (ret) {
+               dev_err(&pdev->dev, "could not register mdio bus\n");
+               return ret;
+       }
+
+       phy_np = of_parse_phandle(np, "phy-handle", 0);
+       adpt->phydev = of_phy_find_device(phy_np);
+       if (!adpt->phydev) {
+               dev_err(&pdev->dev, "could not find external phy\n");
+               mdiobus_unregister(mii_bus);
+               return -ENODEV;
+       }
+
+       if (adpt->phydev->drv)
+               phy_attached_print(adpt->phydev, NULL);
+
+       return 0;
+}
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.h b/drivers/net/ethernet/qualcomm/emac/emac-phy.h
new file mode 100644 (file)
index 0000000..49f3701
--- /dev/null
@@ -0,0 +1,33 @@
+/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 and
+* only version 2 as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+
+#ifndef _EMAC_PHY_H_
+#define _EMAC_PHY_H_
+
+typedef int (*emac_sgmii_initialize)(struct emac_adapter *adpt);
+
+/** emac_phy - internal emac phy
+ * @base base address
+ * @digital per-lane digital block
+ * @initialize initialization function
+ */
+struct emac_phy {
+       void __iomem            *base;
+       void __iomem            *digital;
+       emac_sgmii_initialize   initialize;
+};
+
+struct emac_adapter;
+
+int emac_phy_config(struct platform_device *pdev, struct emac_adapter *adpt);
+
+#endif /* _EMAC_PHY_H_ */
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
new file mode 100644 (file)
index 0000000..6ab0a3c
--- /dev/null
@@ -0,0 +1,721 @@
+/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* Qualcomm Technologies, Inc. EMAC SGMII Controller driver.
+ */
+
+#include <linux/iopoll.h>
+#include <linux/of_device.h>
+#include "emac.h"
+#include "emac-mac.h"
+#include "emac-sgmii.h"
+
+/* EMAC_QSERDES register offsets */
+#define EMAC_QSERDES_COM_SYS_CLK_CTRL          0x000000
+#define EMAC_QSERDES_COM_PLL_CNTRL             0x000014
+#define EMAC_QSERDES_COM_PLL_IP_SETI           0x000018
+#define EMAC_QSERDES_COM_PLL_CP_SETI           0x000024
+#define EMAC_QSERDES_COM_PLL_IP_SETP           0x000028
+#define EMAC_QSERDES_COM_PLL_CP_SETP           0x00002c
+#define EMAC_QSERDES_COM_SYSCLK_EN_SEL         0x000038
+#define EMAC_QSERDES_COM_RESETSM_CNTRL         0x000040
+#define EMAC_QSERDES_COM_PLLLOCK_CMP1          0x000044
+#define EMAC_QSERDES_COM_PLLLOCK_CMP2          0x000048
+#define EMAC_QSERDES_COM_PLLLOCK_CMP3          0x00004c
+#define EMAC_QSERDES_COM_PLLLOCK_CMP_EN                0x000050
+#define EMAC_QSERDES_COM_DEC_START1            0x000064
+#define EMAC_QSERDES_COM_DIV_FRAC_START1       0x000098
+#define EMAC_QSERDES_COM_DIV_FRAC_START2       0x00009c
+#define EMAC_QSERDES_COM_DIV_FRAC_START3       0x0000a0
+#define EMAC_QSERDES_COM_DEC_START2            0x0000a4
+#define EMAC_QSERDES_COM_PLL_CRCTRL            0x0000ac
+#define EMAC_QSERDES_COM_RESET_SM              0x0000bc
+#define EMAC_QSERDES_TX_BIST_MODE_LANENO       0x000100
+#define EMAC_QSERDES_TX_TX_EMP_POST1_LVL       0x000108
+#define EMAC_QSERDES_TX_TX_DRV_LVL             0x00010c
+#define EMAC_QSERDES_TX_LANE_MODE              0x000150
+#define EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN       0x000170
+#define EMAC_QSERDES_RX_CDR_CONTROL            0x000200
+#define EMAC_QSERDES_RX_CDR_CONTROL2           0x000210
+#define EMAC_QSERDES_RX_RX_EQ_GAIN12           0x000230
+
+/* EMAC_SGMII register offsets */
+#define EMAC_SGMII_PHY_SERDES_START            0x000000
+#define EMAC_SGMII_PHY_CMN_PWR_CTRL            0x000004
+#define EMAC_SGMII_PHY_RX_PWR_CTRL             0x000008
+#define EMAC_SGMII_PHY_TX_PWR_CTRL             0x00000C
+#define EMAC_SGMII_PHY_LANE_CTRL1              0x000018
+#define EMAC_SGMII_PHY_AUTONEG_CFG2            0x000048
+#define EMAC_SGMII_PHY_CDR_CTRL0               0x000058
+#define EMAC_SGMII_PHY_SPEED_CFG1              0x000074
+#define EMAC_SGMII_PHY_POW_DWN_CTRL0           0x000080
+#define EMAC_SGMII_PHY_RESET_CTRL              0x0000a8
+#define EMAC_SGMII_PHY_IRQ_CMD                 0x0000ac
+#define EMAC_SGMII_PHY_INTERRUPT_CLEAR         0x0000b0
+#define EMAC_SGMII_PHY_INTERRUPT_MASK          0x0000b4
+#define EMAC_SGMII_PHY_INTERRUPT_STATUS                0x0000b8
+#define EMAC_SGMII_PHY_RX_CHK_STATUS           0x0000d4
+#define EMAC_SGMII_PHY_AUTONEG0_STATUS         0x0000e0
+#define EMAC_SGMII_PHY_AUTONEG1_STATUS         0x0000e4
+
+/* EMAC_QSERDES_COM_PLL_IP_SETI */
+#define PLL_IPSETI(x)                          ((x) & 0x3f)
+
+/* EMAC_QSERDES_COM_PLL_CP_SETI */
+#define PLL_CPSETI(x)                          ((x) & 0xff)
+
+/* EMAC_QSERDES_COM_PLL_IP_SETP */
+#define PLL_IPSETP(x)                          ((x) & 0x3f)
+
+/* EMAC_QSERDES_COM_PLL_CP_SETP */
+#define PLL_CPSETP(x)                          ((x) & 0x1f)
+
+/* EMAC_QSERDES_COM_PLL_CRCTRL */
+#define PLL_RCTRL(x)                           (((x) & 0xf) << 4)
+#define PLL_CCTRL(x)                           ((x) & 0xf)
+
+/* SGMII v2 PHY registers per lane */
+#define EMAC_SGMII_PHY_LN_OFFSET               0x0400
+
+/* SGMII v2 digital lane registers */
+#define EMAC_SGMII_LN_DRVR_CTRL0               0x00C
+#define EMAC_SGMII_LN_DRVR_TAP_EN              0x018
+#define EMAC_SGMII_LN_TX_MARGINING             0x01C
+#define EMAC_SGMII_LN_TX_PRE                   0x020
+#define EMAC_SGMII_LN_TX_POST                  0x024
+#define EMAC_SGMII_LN_TX_BAND_MODE             0x060
+#define EMAC_SGMII_LN_LANE_MODE                        0x064
+#define EMAC_SGMII_LN_PARALLEL_RATE            0x078
+#define EMAC_SGMII_LN_CML_CTRL_MODE0           0x0B8
+#define EMAC_SGMII_LN_MIXER_CTRL_MODE0         0x0D0
+#define EMAC_SGMII_LN_VGA_INITVAL              0x134
+#define EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0       0x17C
+#define EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0       0x188
+#define EMAC_SGMII_LN_UCDR_SO_CONFIG           0x194
+#define EMAC_SGMII_LN_RX_BAND                  0x19C
+#define EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0      0x1B8
+#define EMAC_SGMII_LN_RSM_CONFIG               0x1F0
+#define EMAC_SGMII_LN_SIGDET_ENABLES           0x224
+#define EMAC_SGMII_LN_SIGDET_CNTRL             0x228
+#define EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL    0x22C
+#define EMAC_SGMII_LN_RX_EN_SIGNAL             0x2A0
+#define EMAC_SGMII_LN_RX_MISC_CNTRL0           0x2AC
+#define EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV                0x2BC
+
+/* SGMII v2 digital lane register values */
+#define UCDR_STEP_BY_TWO_MODE0                 BIT(7)
+#define UCDR_xO_GAIN_MODE(x)                   ((x) & 0x7f)
+#define UCDR_ENABLE                            BIT(6)
+#define UCDR_SO_SATURATION(x)                  ((x) & 0x3f)
+#define SIGDET_LP_BYP_PS4                      BIT(7)
+#define SIGDET_EN_PS0_TO_PS2                   BIT(6)
+#define EN_ACCOUPLEVCM_SW_MUX                  BIT(5)
+#define EN_ACCOUPLEVCM_SW                      BIT(4)
+#define RX_SYNC_EN                             BIT(3)
+#define RXTERM_HIGHZ_PS5                       BIT(2)
+#define SIGDET_EN_PS3                          BIT(1)
+#define EN_ACCOUPLE_VCM_PS3                    BIT(0)
+#define UFS_MODE                               BIT(5)
+#define TXVAL_VALID_INIT                       BIT(4)
+#define TXVAL_VALID_MUX                                BIT(3)
+#define TXVAL_VALID                            BIT(2)
+#define USB3P1_MODE                            BIT(1)
+#define KR_PCIGEN3_MODE                                BIT(0)
+#define PRE_EN                                 BIT(3)
+#define POST_EN                                        BIT(2)
+#define MAIN_EN_MUX                            BIT(1)
+#define MAIN_EN                                        BIT(0)
+#define TX_MARGINING_MUX                       BIT(6)
+#define TX_MARGINING(x)                                ((x) & 0x3f)
+#define TX_PRE_MUX                             BIT(6)
+#define TX_PRE(x)                              ((x) & 0x3f)
+#define TX_POST_MUX                            BIT(6)
+#define TX_POST(x)                             ((x) & 0x3f)
+#define CML_GEAR_MODE(x)                       (((x) & 7) << 3)
+#define CML2CMOS_IBOOST_MODE(x)                        ((x) & 7)
+#define MIXER_LOADB_MODE(x)                    (((x) & 0xf) << 2)
+#define MIXER_DATARATE_MODE(x)                 ((x) & 3)
+#define VGA_THRESH_DFE(x)                      ((x) & 0x3f)
+#define SIGDET_LP_BYP_PS0_TO_PS2               BIT(5)
+#define SIGDET_LP_BYP_MUX                      BIT(4)
+#define SIGDET_LP_BYP                          BIT(3)
+#define SIGDET_EN_MUX                          BIT(2)
+#define SIGDET_EN                              BIT(1)
+#define SIGDET_FLT_BYP                         BIT(0)
+#define SIGDET_LVL(x)                          (((x) & 0xf) << 4)
+#define SIGDET_BW_CTRL(x)                      ((x) & 0xf)
+#define SIGDET_DEGLITCH_CTRL(x)                        (((x) & 0xf) << 1)
+#define SIGDET_DEGLITCH_BYP                    BIT(0)
+#define INVERT_PCS_RX_CLK                      BIT(7)
+#define PWM_EN                                 BIT(6)
+#define RXBIAS_SEL(x)                          (((x) & 0x3) << 4)
+#define EBDAC_SIGN                             BIT(3)
+#define EDAC_SIGN                              BIT(2)
+#define EN_AUXTAP1SIGN_INVERT                  BIT(1)
+#define EN_DAC_CHOPPING                                BIT(0)
+#define DRVR_LOGIC_CLK_EN                      BIT(4)
+#define DRVR_LOGIC_CLK_DIV(x)                  ((x) & 0xf)
+#define PARALLEL_RATE_MODE2(x)                 (((x) & 0x3) << 4)
+#define PARALLEL_RATE_MODE1(x)                 (((x) & 0x3) << 2)
+#define PARALLEL_RATE_MODE0(x)                 ((x) & 0x3)
+#define BAND_MODE2(x)                          (((x) & 0x3) << 4)
+#define BAND_MODE1(x)                          (((x) & 0x3) << 2)
+#define BAND_MODE0(x)                          ((x) & 0x3)
+#define LANE_SYNC_MODE                         BIT(5)
+#define LANE_MODE(x)                           ((x) & 0x1f)
+#define CDR_PD_SEL_MODE0(x)                    (((x) & 0x3) << 5)
+#define EN_DLL_MODE0                           BIT(4)
+#define EN_IQ_DCC_MODE0                                BIT(3)
+#define EN_IQCAL_MODE0                         BIT(2)
+#define EN_QPATH_MODE0                         BIT(1)
+#define EN_EPATH_MODE0                         BIT(0)
+#define FORCE_TSYNC_ACK                                BIT(7)
+#define FORCE_CMN_ACK                          BIT(6)
+#define FORCE_CMN_READY                                BIT(5)
+#define EN_RCLK_DEGLITCH                       BIT(4)
+#define BYPASS_RSM_CDR_RESET                   BIT(3)
+#define BYPASS_RSM_TSYNC                       BIT(2)
+#define BYPASS_RSM_SAMP_CAL                    BIT(1)
+#define BYPASS_RSM_DLL_CAL                     BIT(0)
+
+/* EMAC_QSERDES_COM_SYS_CLK_CTRL */
+#define SYSCLK_CM                              BIT(4)
+#define SYSCLK_AC_COUPLE                       BIT(3)
+
+/* EMAC_QSERDES_COM_PLL_CNTRL */
+#define OCP_EN                                 BIT(5)
+#define PLL_DIV_FFEN                           BIT(2)
+#define PLL_DIV_ORD                            BIT(1)
+
+/* EMAC_QSERDES_COM_SYSCLK_EN_SEL */
+#define SYSCLK_SEL_CMOS                                BIT(3)
+
+/* EMAC_QSERDES_COM_RESETSM_CNTRL */
+#define FRQ_TUNE_MODE                          BIT(4)
+
+/* EMAC_QSERDES_COM_PLLLOCK_CMP_EN */
+#define PLLLOCK_CMP_EN                         BIT(0)
+
+/* EMAC_QSERDES_COM_DEC_START1 */
+#define DEC_START1_MUX                         BIT(7)
+#define DEC_START1(x)                          ((x) & 0x7f)
+
+/* EMAC_QSERDES_COM_DIV_FRAC_START1 * EMAC_QSERDES_COM_DIV_FRAC_START2 */
+#define DIV_FRAC_START_MUX                     BIT(7)
+#define DIV_FRAC_START(x)                      ((x) & 0x7f)
+
+/* EMAC_QSERDES_COM_DIV_FRAC_START3 */
+#define DIV_FRAC_START3_MUX                    BIT(4)
+#define DIV_FRAC_START3(x)                     ((x) & 0xf)
+
+/* EMAC_QSERDES_COM_DEC_START2 */
+#define DEC_START2_MUX                         BIT(1)
+#define DEC_START2                             BIT(0)
+
+/* EMAC_QSERDES_COM_RESET_SM */
+#define READY                                  BIT(5)
+
+/* EMAC_QSERDES_TX_TX_EMP_POST1_LVL */
+#define TX_EMP_POST1_LVL_MUX                   BIT(5)
+#define TX_EMP_POST1_LVL(x)                    ((x) & 0x1f)
+#define TX_EMP_POST1_LVL_BMSK                  0x1f
+#define TX_EMP_POST1_LVL_SHFT                  0
+
+/* EMAC_QSERDES_TX_TX_DRV_LVL */
+#define TX_DRV_LVL_MUX                         BIT(4)
+#define TX_DRV_LVL(x)                          ((x) & 0xf)
+
+/* EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN */
+#define EMP_EN_MUX                             BIT(1)
+#define EMP_EN                                 BIT(0)
+
+/* EMAC_QSERDES_RX_CDR_CONTROL & EMAC_QSERDES_RX_CDR_CONTROL2 */
+#define HBW_PD_EN                              BIT(7)
+#define SECONDORDERENABLE                      BIT(6)
+#define FIRSTORDER_THRESH(x)                   (((x) & 0x7) << 3)
+#define SECONDORDERGAIN(x)                     ((x) & 0x7)
+
+/* EMAC_QSERDES_RX_RX_EQ_GAIN12 */
+#define RX_EQ_GAIN2(x)                         (((x) & 0xf) << 4)
+#define RX_EQ_GAIN1(x)                         ((x) & 0xf)
+
+/* EMAC_SGMII_PHY_SERDES_START */
+#define SERDES_START                           BIT(0)
+
+/* EMAC_SGMII_PHY_CMN_PWR_CTRL */
+#define BIAS_EN                                        BIT(6)
+#define PLL_EN                                 BIT(5)
+#define SYSCLK_EN                              BIT(4)
+#define CLKBUF_L_EN                            BIT(3)
+#define PLL_TXCLK_EN                           BIT(1)
+#define PLL_RXCLK_EN                           BIT(0)
+
+/* EMAC_SGMII_PHY_RX_PWR_CTRL */
+#define L0_RX_SIGDET_EN                                BIT(7)
+#define L0_RX_TERM_MODE(x)                     (((x) & 3) << 4)
+#define L0_RX_I_EN                             BIT(1)
+
+/* EMAC_SGMII_PHY_TX_PWR_CTRL */
+#define L0_TX_EN                               BIT(5)
+#define L0_CLKBUF_EN                           BIT(4)
+#define L0_TRAN_BIAS_EN                                BIT(1)
+
+/* EMAC_SGMII_PHY_LANE_CTRL1 */
+#define L0_RX_EQUALIZE_ENABLE                  BIT(6)
+#define L0_RESET_TSYNC_EN                      BIT(4)
+#define L0_DRV_LVL(x)                          ((x) & 0xf)
+
+/* EMAC_SGMII_PHY_AUTONEG_CFG2 */
+#define FORCE_AN_TX_CFG                                BIT(5)
+#define FORCE_AN_RX_CFG                                BIT(4)
+#define AN_ENABLE                              BIT(0)
+
+/* EMAC_SGMII_PHY_SPEED_CFG1 */
+#define DUPLEX_MODE                            BIT(4)
+#define SPDMODE_1000                           BIT(1)
+#define SPDMODE_100                            BIT(0)
+#define SPDMODE_10                             0
+#define SPDMODE_BMSK                           3
+#define SPDMODE_SHFT                           0
+
+/* EMAC_SGMII_PHY_POW_DWN_CTRL0 */
+#define PWRDN_B                                        BIT(0)
+#define CDR_MAX_CNT(x)                         ((x) & 0xff)
+
+/* EMAC_QSERDES_TX_BIST_MODE_LANENO */
+#define BIST_LANE_NUMBER(x)                    (((x) & 3) << 5)
+#define BISTMODE(x)                            ((x) & 0x1f)
+
+/* EMAC_QSERDES_COM_PLLLOCK_CMPx */
+#define PLLLOCK_CMP(x)                         ((x) & 0xff)
+
+/* EMAC_SGMII_PHY_RESET_CTRL */
+#define PHY_SW_RESET                           BIT(0)
+
+/* EMAC_SGMII_PHY_IRQ_CMD */
+#define IRQ_GLOBAL_CLEAR                       BIT(0)
+
+/* EMAC_SGMII_PHY_INTERRUPT_MASK */
+#define DECODE_CODE_ERR                                BIT(7)
+#define DECODE_DISP_ERR                                BIT(6)
+#define PLL_UNLOCK                             BIT(5)
+#define AN_ILLEGAL_TERM                                BIT(4)
+#define SYNC_FAIL                              BIT(3)
+#define AN_START                               BIT(2)
+#define AN_END                                 BIT(1)
+#define AN_REQUEST                             BIT(0)
+
+#define SGMII_PHY_IRQ_CLR_WAIT_TIME            10
+
+#define SGMII_PHY_INTERRUPT_ERR (\
+       DECODE_CODE_ERR         |\
+       DECODE_DISP_ERR)
+
+#define SGMII_ISR_AN_MASK       (\
+       AN_REQUEST              |\
+       AN_START                |\
+       AN_END                  |\
+       AN_ILLEGAL_TERM         |\
+       PLL_UNLOCK              |\
+       SYNC_FAIL)
+
+#define SGMII_ISR_MASK          (\
+       SGMII_PHY_INTERRUPT_ERR |\
+       SGMII_ISR_AN_MASK)
+
+/* SGMII TX_CONFIG */
+#define TXCFG_LINK                             0x8000
+#define TXCFG_MODE_BMSK                                0x1c00
+#define TXCFG_1000_FULL                                0x1800
+#define TXCFG_100_FULL                         0x1400
+#define TXCFG_100_HALF                         0x0400
+#define TXCFG_10_FULL                          0x1000
+#define TXCFG_10_HALF                          0x0000
+
+#define SERDES_START_WAIT_TIMES                        100
+
+struct emac_reg_write {
+       unsigned int offset;
+       u32 val;
+};
+
+static void emac_reg_write_all(void __iomem *base,
+                              const struct emac_reg_write *itr, size_t size)
+{
+       size_t i;
+
+       for (i = 0; i < size; ++itr, ++i)
+               writel(itr->val, base + itr->offset);
+}
+
+static const struct emac_reg_write physical_coding_sublayer_programming_v1[] = {
+       {EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)},
+       {EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B},
+       {EMAC_SGMII_PHY_CMN_PWR_CTRL,
+               BIAS_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN | PLL_RXCLK_EN},
+       {EMAC_SGMII_PHY_TX_PWR_CTRL, L0_TX_EN | L0_CLKBUF_EN | L0_TRAN_BIAS_EN},
+       {EMAC_SGMII_PHY_RX_PWR_CTRL,
+               L0_RX_SIGDET_EN | L0_RX_TERM_MODE(1) | L0_RX_I_EN},
+       {EMAC_SGMII_PHY_CMN_PWR_CTRL,
+               BIAS_EN | PLL_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN |
+               PLL_RXCLK_EN},
+       {EMAC_SGMII_PHY_LANE_CTRL1,
+               L0_RX_EQUALIZE_ENABLE | L0_RESET_TSYNC_EN | L0_DRV_LVL(15)},
+};
+
+static const struct emac_reg_write sysclk_refclk_setting[] = {
+       {EMAC_QSERDES_COM_SYSCLK_EN_SEL, SYSCLK_SEL_CMOS},
+       {EMAC_QSERDES_COM_SYS_CLK_CTRL, SYSCLK_CM | SYSCLK_AC_COUPLE},
+};
+
+static const struct emac_reg_write pll_setting[] = {
+       {EMAC_QSERDES_COM_PLL_IP_SETI, PLL_IPSETI(1)},
+       {EMAC_QSERDES_COM_PLL_CP_SETI, PLL_CPSETI(59)},
+       {EMAC_QSERDES_COM_PLL_IP_SETP, PLL_IPSETP(10)},
+       {EMAC_QSERDES_COM_PLL_CP_SETP, PLL_CPSETP(9)},
+       {EMAC_QSERDES_COM_PLL_CRCTRL, PLL_RCTRL(15) | PLL_CCTRL(11)},
+       {EMAC_QSERDES_COM_PLL_CNTRL, OCP_EN | PLL_DIV_FFEN | PLL_DIV_ORD},
+       {EMAC_QSERDES_COM_DEC_START1, DEC_START1_MUX | DEC_START1(2)},
+       {EMAC_QSERDES_COM_DEC_START2, DEC_START2_MUX | DEC_START2},
+       {EMAC_QSERDES_COM_DIV_FRAC_START1,
+               DIV_FRAC_START_MUX | DIV_FRAC_START(85)},
+       {EMAC_QSERDES_COM_DIV_FRAC_START2,
+               DIV_FRAC_START_MUX | DIV_FRAC_START(42)},
+       {EMAC_QSERDES_COM_DIV_FRAC_START3,
+               DIV_FRAC_START3_MUX | DIV_FRAC_START3(3)},
+       {EMAC_QSERDES_COM_PLLLOCK_CMP1, PLLLOCK_CMP(43)},
+       {EMAC_QSERDES_COM_PLLLOCK_CMP2, PLLLOCK_CMP(104)},
+       {EMAC_QSERDES_COM_PLLLOCK_CMP3, PLLLOCK_CMP(0)},
+       {EMAC_QSERDES_COM_PLLLOCK_CMP_EN, PLLLOCK_CMP_EN},
+       {EMAC_QSERDES_COM_RESETSM_CNTRL, FRQ_TUNE_MODE},
+};
+
+static const struct emac_reg_write cdr_setting[] = {
+       {EMAC_QSERDES_RX_CDR_CONTROL,
+               SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(2)},
+       {EMAC_QSERDES_RX_CDR_CONTROL2,
+               SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(4)},
+};
+
+static const struct emac_reg_write tx_rx_setting[] = {
+       {EMAC_QSERDES_TX_BIST_MODE_LANENO, 0},
+       {EMAC_QSERDES_TX_TX_DRV_LVL, TX_DRV_LVL_MUX | TX_DRV_LVL(15)},
+       {EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN, EMP_EN_MUX | EMP_EN},
+       {EMAC_QSERDES_TX_TX_EMP_POST1_LVL,
+               TX_EMP_POST1_LVL_MUX | TX_EMP_POST1_LVL(1)},
+       {EMAC_QSERDES_RX_RX_EQ_GAIN12, RX_EQ_GAIN2(15) | RX_EQ_GAIN1(15)},
+       {EMAC_QSERDES_TX_LANE_MODE, LANE_MODE(8)},
+};
+
+static const struct emac_reg_write sgmii_v2_laned[] = {
+       /* CDR Settings */
+       {EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0,
+               UCDR_STEP_BY_TWO_MODE0 | UCDR_xO_GAIN_MODE(10)},
+       {EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(6)},
+       {EMAC_SGMII_LN_UCDR_SO_CONFIG, UCDR_ENABLE | UCDR_SO_SATURATION(12)},
+
+       /* TX/RX Settings */
+       {EMAC_SGMII_LN_RX_EN_SIGNAL, SIGDET_LP_BYP_PS4 | SIGDET_EN_PS0_TO_PS2},
+
+       {EMAC_SGMII_LN_DRVR_CTRL0, TXVAL_VALID_INIT | KR_PCIGEN3_MODE},
+       {EMAC_SGMII_LN_DRVR_TAP_EN, MAIN_EN},
+       {EMAC_SGMII_LN_TX_MARGINING, TX_MARGINING_MUX | TX_MARGINING(25)},
+       {EMAC_SGMII_LN_TX_PRE, TX_PRE_MUX},
+       {EMAC_SGMII_LN_TX_POST, TX_POST_MUX},
+
+       {EMAC_SGMII_LN_CML_CTRL_MODE0,
+               CML_GEAR_MODE(1) | CML2CMOS_IBOOST_MODE(1)},
+       {EMAC_SGMII_LN_MIXER_CTRL_MODE0,
+               MIXER_LOADB_MODE(12) | MIXER_DATARATE_MODE(1)},
+       {EMAC_SGMII_LN_VGA_INITVAL, VGA_THRESH_DFE(31)},
+       {EMAC_SGMII_LN_SIGDET_ENABLES,
+               SIGDET_LP_BYP_PS0_TO_PS2 | SIGDET_FLT_BYP},
+       {EMAC_SGMII_LN_SIGDET_CNTRL, SIGDET_LVL(8)},
+
+       {EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL, SIGDET_DEGLITCH_CTRL(4)},
+       {EMAC_SGMII_LN_RX_MISC_CNTRL0, 0},
+       {EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV,
+               DRVR_LOGIC_CLK_EN | DRVR_LOGIC_CLK_DIV(4)},
+
+       {EMAC_SGMII_LN_PARALLEL_RATE, PARALLEL_RATE_MODE0(1)},
+       {EMAC_SGMII_LN_TX_BAND_MODE, BAND_MODE0(2)},
+       {EMAC_SGMII_LN_RX_BAND, BAND_MODE0(3)},
+       {EMAC_SGMII_LN_LANE_MODE, LANE_MODE(26)},
+       {EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0, CDR_PD_SEL_MODE0(3)},
+       {EMAC_SGMII_LN_RSM_CONFIG, BYPASS_RSM_SAMP_CAL | BYPASS_RSM_DLL_CAL},
+};
+
+static const struct emac_reg_write physical_coding_sublayer_programming_v2[] = {
+       {EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B},
+       {EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)},
+       {EMAC_SGMII_PHY_TX_PWR_CTRL, 0},
+       {EMAC_SGMII_PHY_LANE_CTRL1, L0_RX_EQUALIZE_ENABLE},
+};
+
+static int emac_sgmii_link_init(struct emac_adapter *adpt)
+{
+       struct phy_device *phydev = adpt->phydev;
+       struct emac_phy *phy = &adpt->phy;
+       u32 val;
+
+       val = readl(phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2);
+
+       if (phydev->autoneg == AUTONEG_ENABLE) {
+               val &= ~(FORCE_AN_RX_CFG | FORCE_AN_TX_CFG);
+               val |= AN_ENABLE;
+               writel(val, phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2);
+       } else {
+               u32 speed_cfg;
+
+               switch (phydev->speed) {
+               case SPEED_10:
+                       speed_cfg = SPDMODE_10;
+                       break;
+               case SPEED_100:
+                       speed_cfg = SPDMODE_100;
+                       break;
+               case SPEED_1000:
+                       speed_cfg = SPDMODE_1000;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+
+               if (phydev->duplex == DUPLEX_FULL)
+                       speed_cfg |= DUPLEX_MODE;
+
+               val &= ~AN_ENABLE;
+               writel(speed_cfg, phy->base + EMAC_SGMII_PHY_SPEED_CFG1);
+               writel(val, phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2);
+       }
+
+       return 0;
+}
+
+static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u32 irq_bits)
+{
+       struct emac_phy *phy = &adpt->phy;
+       u32 status;
+
+       writel_relaxed(irq_bits, phy->base + EMAC_SGMII_PHY_INTERRUPT_CLEAR);
+       writel_relaxed(IRQ_GLOBAL_CLEAR, phy->base + EMAC_SGMII_PHY_IRQ_CMD);
+       /* Ensure interrupt clear command is written to HW */
+       wmb();
+
+       /* After set the IRQ_GLOBAL_CLEAR bit, the status clearing must
+        * be confirmed before clearing the bits in other registers.
+        * It takes a few cycles for hw to clear the interrupt status.
+        */
+       if (readl_poll_timeout_atomic(phy->base +
+                                     EMAC_SGMII_PHY_INTERRUPT_STATUS,
+                                     status, !(status & irq_bits), 1,
+                                     SGMII_PHY_IRQ_CLR_WAIT_TIME)) {
+               netdev_err(adpt->netdev,
+                          "error: failed clear SGMII irq: status:0x%x bits:0x%x\n",
+                          status, irq_bits);
+               return -EIO;
+       }
+
+       /* Finalize clearing procedure */
+       writel_relaxed(0, phy->base + EMAC_SGMII_PHY_IRQ_CMD);
+       writel_relaxed(0, phy->base + EMAC_SGMII_PHY_INTERRUPT_CLEAR);
+
+       /* Ensure that clearing procedure finalization is written to HW */
+       wmb();
+
+       return 0;
+}
+
+int emac_sgmii_init_v1(struct emac_adapter *adpt)
+{
+       struct emac_phy *phy = &adpt->phy;
+       unsigned int i;
+       int ret;
+
+       ret = emac_sgmii_link_init(adpt);
+       if (ret)
+               return ret;
+
+       emac_reg_write_all(phy->base, physical_coding_sublayer_programming_v1,
+                          ARRAY_SIZE(physical_coding_sublayer_programming_v1));
+       emac_reg_write_all(phy->base, sysclk_refclk_setting,
+                          ARRAY_SIZE(sysclk_refclk_setting));
+       emac_reg_write_all(phy->base, pll_setting, ARRAY_SIZE(pll_setting));
+       emac_reg_write_all(phy->base, cdr_setting, ARRAY_SIZE(cdr_setting));
+       emac_reg_write_all(phy->base, tx_rx_setting,
+                          ARRAY_SIZE(tx_rx_setting));
+
+       /* Power up the Ser/Des engine */
+       writel(SERDES_START, phy->base + EMAC_SGMII_PHY_SERDES_START);
+
+       for (i = 0; i < SERDES_START_WAIT_TIMES; i++) {
+               if (readl(phy->base + EMAC_QSERDES_COM_RESET_SM) & READY)
+                       break;
+               usleep_range(100, 200);
+       }
+
+       if (i == SERDES_START_WAIT_TIMES) {
+               netdev_err(adpt->netdev, "error: ser/des failed to start\n");
+               return -EIO;
+       }
+       /* Mask out all the SGMII Interrupt */
+       writel(0, phy->base + EMAC_SGMII_PHY_INTERRUPT_MASK);
+
+       emac_sgmii_irq_clear(adpt, SGMII_PHY_INTERRUPT_ERR);
+
+       return 0;
+}
+
+int emac_sgmii_init_v2(struct emac_adapter *adpt)
+{
+       struct emac_phy *phy = &adpt->phy;
+       void __iomem *phy_regs = phy->base;
+       void __iomem *laned = phy->digital;
+       unsigned int i;
+       u32 lnstatus;
+       int ret;
+
+       ret = emac_sgmii_link_init(adpt);
+       if (ret)
+               return ret;
+
+       /* PCS lane-x init */
+       emac_reg_write_all(phy->base, physical_coding_sublayer_programming_v2,
+                          ARRAY_SIZE(physical_coding_sublayer_programming_v2));
+
+       /* SGMII lane-x init */
+       emac_reg_write_all(phy->digital,
+                          sgmii_v2_laned, ARRAY_SIZE(sgmii_v2_laned));
+
+       /* Power up PCS and start reset lane state machine */
+
+       writel(0, phy_regs + EMAC_SGMII_PHY_RESET_CTRL);
+       writel(1, laned + SGMII_LN_RSM_START);
+
+       /* Wait for c_ready assertion */
+       for (i = 0; i < SERDES_START_WAIT_TIMES; i++) {
+               lnstatus = readl(phy_regs + SGMII_PHY_LN_LANE_STATUS);
+               if (lnstatus & BIT(1))
+                       break;
+               usleep_range(100, 200);
+       }
+
+       if (i == SERDES_START_WAIT_TIMES) {
+               netdev_err(adpt->netdev, "SGMII failed to start\n");
+               return -EIO;
+       }
+
+       /* Disable digital and SERDES loopback */
+       writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN0);
+       writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN2);
+       writel(0, phy_regs + SGMII_PHY_LN_CDR_CTRL1);
+
+       /* Mask out all the SGMII Interrupt */
+       writel(0, phy_regs + EMAC_SGMII_PHY_INTERRUPT_MASK);
+
+       emac_sgmii_irq_clear(adpt, SGMII_PHY_INTERRUPT_ERR);
+
+       return 0;
+}
+
+static void emac_sgmii_reset_prepare(struct emac_adapter *adpt)
+{
+       struct emac_phy *phy = &adpt->phy;
+       u32 val;
+
+       /* Reset PHY */
+       val = readl(phy->base + EMAC_EMAC_WRAPPER_CSR2);
+       writel(((val & ~PHY_RESET) | PHY_RESET), phy->base +
+              EMAC_EMAC_WRAPPER_CSR2);
+       /* Ensure phy-reset command is written to HW before the release cmd */
+       msleep(50);
+       val = readl(phy->base + EMAC_EMAC_WRAPPER_CSR2);
+       writel((val & ~PHY_RESET), phy->base + EMAC_EMAC_WRAPPER_CSR2);
+       /* Ensure phy-reset release command is written to HW before initializing
+        * SGMII
+        */
+       msleep(50);
+}
+
+void emac_sgmii_reset(struct emac_adapter *adpt)
+{
+       int ret;
+
+       clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 19200000);
+       emac_sgmii_reset_prepare(adpt);
+
+       ret = adpt->phy.initialize(adpt);
+       if (ret)
+               netdev_err(adpt->netdev,
+                          "could not reinitialize internal PHY (error=%i)\n",
+                          ret);
+
+       clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 125000000);
+}
+
+static const struct of_device_id emac_sgmii_dt_match[] = {
+       {
+               .compatible = "qcom,fsm9900-emac-sgmii",
+               .data = emac_sgmii_init_v1,
+       },
+       {
+               .compatible = "qcom,qdf2432-emac-sgmii",
+               .data = emac_sgmii_init_v2,
+       },
+       {}
+};
+
+int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt)
+{
+       struct platform_device *sgmii_pdev = NULL;
+       struct emac_phy *phy = &adpt->phy;
+       struct resource *res;
+       const struct of_device_id *match;
+       struct device_node *np;
+
+       np = of_parse_phandle(pdev->dev.of_node, "internal-phy", 0);
+       if (!np) {
+               dev_err(&pdev->dev, "missing internal-phy property\n");
+               return -ENODEV;
+       }
+
+       sgmii_pdev = of_find_device_by_node(np);
+       if (!sgmii_pdev) {
+               dev_err(&pdev->dev, "invalid internal-phy property\n");
+               return -ENODEV;
+       }
+
+       match = of_match_device(emac_sgmii_dt_match, &sgmii_pdev->dev);
+       if (!match) {
+               dev_err(&pdev->dev, "unrecognized internal phy node\n");
+               return -ENODEV;
+       }
+
+       phy->initialize = (emac_sgmii_initialize)match->data;
+
+       /* Base address is the first address */
+       res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 0);
+       phy->base = devm_ioremap_resource(&sgmii_pdev->dev, res);
+       if (IS_ERR(phy->base))
+               return PTR_ERR(phy->base);
+
+       /* v2 SGMII has a per-lane digital digital, so parse it if it exists */
+       res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 1);
+       if (res) {
+               phy->digital = devm_ioremap_resource(&sgmii_pdev->dev, res);
+               if (IS_ERR(phy->base))
+                       return PTR_ERR(phy->base);
+
+       }
+
+       return phy->initialize(adpt);
+}
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h
new file mode 100644 (file)
index 0000000..ce79212
--- /dev/null
@@ -0,0 +1,24 @@
+/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _EMAC_SGMII_H_
+#define _EMAC_SGMII_H_
+
+struct emac_adapter;
+struct platform_device;
+
+int emac_sgmii_init_v1(struct emac_adapter *adpt);
+int emac_sgmii_init_v2(struct emac_adapter *adpt);
+int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt);
+void emac_sgmii_reset(struct emac_adapter *adpt);
+
+#endif
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
new file mode 100644 (file)
index 0000000..e47d387
--- /dev/null
@@ -0,0 +1,741 @@
+/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* Qualcomm Technologies, Inc. EMAC Gigabit Ethernet Driver */
+
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_net.h>
+#include <linux/of_device.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include "emac.h"
+#include "emac-mac.h"
+#include "emac-phy.h"
+#include "emac-sgmii.h"
+
+#define EMAC_MSG_DEFAULT (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK |  \
+               NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP)
+
+#define EMAC_RRD_SIZE                                       4
+/* The RRD size if timestamping is enabled: */
+#define EMAC_TS_RRD_SIZE                                    6
+#define EMAC_TPD_SIZE                                       4
+#define EMAC_RFD_SIZE                                       2
+
+#define REG_MAC_RX_STATUS_BIN           EMAC_RXMAC_STATC_REG0
+#define REG_MAC_RX_STATUS_END          EMAC_RXMAC_STATC_REG22
+#define REG_MAC_TX_STATUS_BIN           EMAC_TXMAC_STATC_REG0
+#define REG_MAC_TX_STATUS_END          EMAC_TXMAC_STATC_REG24
+
+#define RXQ0_NUM_RFD_PREF_DEF                               8
+#define TXQ0_NUM_TPD_PREF_DEF                               5
+
+#define EMAC_PREAMBLE_DEF                                   7
+
+#define DMAR_DLY_CNT_DEF                                   15
+#define DMAW_DLY_CNT_DEF                                    4
+
+#define IMR_NORMAL_MASK         (\
+               ISR_ERROR       |\
+               ISR_GPHY_LINK   |\
+               ISR_TX_PKT      |\
+               GPHY_WAKEUP_INT)
+
+#define IMR_EXTENDED_MASK       (\
+               SW_MAN_INT      |\
+               ISR_OVER        |\
+               ISR_ERROR       |\
+               ISR_GPHY_LINK   |\
+               ISR_TX_PKT      |\
+               GPHY_WAKEUP_INT)
+
+#define ISR_TX_PKT      (\
+       TX_PKT_INT      |\
+       TX_PKT_INT1     |\
+       TX_PKT_INT2     |\
+       TX_PKT_INT3)
+
+#define ISR_GPHY_LINK        (\
+       GPHY_LINK_UP_INT     |\
+       GPHY_LINK_DOWN_INT)
+
+#define ISR_OVER        (\
+       RFD0_UR_INT     |\
+       RFD1_UR_INT     |\
+       RFD2_UR_INT     |\
+       RFD3_UR_INT     |\
+       RFD4_UR_INT     |\
+       RXF_OF_INT      |\
+       TXF_UR_INT)
+
+#define ISR_ERROR       (\
+       DMAR_TO_INT     |\
+       DMAW_TO_INT     |\
+       TXQ_TO_INT)
+
+/* in sync with enum emac_clk_id */
+static const char * const emac_clk_name[] = {
+       "axi_clk", "cfg_ahb_clk", "high_speed_clk", "mdio_clk", "tx_clk",
+       "rx_clk", "sys_clk"
+};
+
+void emac_reg_update32(void __iomem *addr, u32 mask, u32 val)
+{
+       u32 data = readl(addr);
+
+       writel(((data & ~mask) | val), addr);
+}
+
+/* reinitialize */
+int emac_reinit_locked(struct emac_adapter *adpt)
+{
+       int ret;
+
+       mutex_lock(&adpt->reset_lock);
+
+       emac_mac_down(adpt);
+       emac_sgmii_reset(adpt);
+       ret = emac_mac_up(adpt);
+
+       mutex_unlock(&adpt->reset_lock);
+
+       return ret;
+}
+
+/* NAPI */
+static int emac_napi_rtx(struct napi_struct *napi, int budget)
+{
+       struct emac_rx_queue *rx_q =
+               container_of(napi, struct emac_rx_queue, napi);
+       struct emac_adapter *adpt = netdev_priv(rx_q->netdev);
+       struct emac_irq *irq = rx_q->irq;
+       int work_done = 0;
+
+       emac_mac_rx_process(adpt, rx_q, &work_done, budget);
+
+       if (work_done < budget) {
+               napi_complete(napi);
+
+               irq->mask |= rx_q->intr;
+               writel(irq->mask, adpt->base + EMAC_INT_MASK);
+       }
+
+       return work_done;
+}
+
+/* Transmit the packet */
+static int emac_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+       struct emac_adapter *adpt = netdev_priv(netdev);
+
+       return emac_mac_tx_buf_send(adpt, &adpt->tx_q, skb);
+}
+
+irqreturn_t emac_isr(int _irq, void *data)
+{
+       struct emac_irq *irq = data;
+       struct emac_adapter *adpt =
+               container_of(irq, struct emac_adapter, irq);
+       struct emac_rx_queue *rx_q = &adpt->rx_q;
+       u32 isr, status;
+
+       /* disable the interrupt */
+       writel(0, adpt->base + EMAC_INT_MASK);
+
+       isr = readl_relaxed(adpt->base + EMAC_INT_STATUS);
+
+       status = isr & irq->mask;
+       if (status == 0)
+               goto exit;
+
+       if (status & ISR_ERROR) {
+               netif_warn(adpt,  intr, adpt->netdev,
+                          "warning: error irq status 0x%lx\n",
+                          status & ISR_ERROR);
+               /* reset MAC */
+               schedule_work(&adpt->work_thread);
+       }
+
+       /* Schedule the napi for receive queue with interrupt
+        * status bit set
+        */
+       if (status & rx_q->intr) {
+               if (napi_schedule_prep(&rx_q->napi)) {
+                       irq->mask &= ~rx_q->intr;
+                       __napi_schedule(&rx_q->napi);
+               }
+       }
+
+       if (status & TX_PKT_INT)
+               emac_mac_tx_process(adpt, &adpt->tx_q);
+
+       if (status & ISR_OVER)
+               net_warn_ratelimited("warning: TX/RX overflow\n");
+
+       /* link event */
+       if (status & ISR_GPHY_LINK)
+               phy_mac_interrupt(adpt->phydev, !!(status & GPHY_LINK_UP_INT));
+
+exit:
+       /* enable the interrupt */
+       writel(irq->mask, adpt->base + EMAC_INT_MASK);
+
+       return IRQ_HANDLED;
+}
+
+/* Configure VLAN tag strip/insert feature */
+static int emac_set_features(struct net_device *netdev,
+                            netdev_features_t features)
+{
+       netdev_features_t changed = features ^ netdev->features;
+       struct emac_adapter *adpt = netdev_priv(netdev);
+
+       /* We only need to reprogram the hardware if the VLAN tag features
+        * have changed, and if it's already running.
+        */
+       if (!(changed & (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX)))
+               return 0;
+
+       if (!netif_running(netdev))
+               return 0;
+
+       /* emac_mac_mode_config() uses netdev->features to configure the EMAC,
+        * so make sure it's set first.
+        */
+       netdev->features = features;
+
+       return emac_reinit_locked(adpt);
+}
+
+/* Configure Multicast and Promiscuous modes */
+static void emac_rx_mode_set(struct net_device *netdev)
+{
+       struct emac_adapter *adpt = netdev_priv(netdev);
+       struct netdev_hw_addr *ha;
+
+       emac_mac_mode_config(adpt);
+
+       /* update multicast address filtering */
+       emac_mac_multicast_addr_clear(adpt);
+       netdev_for_each_mc_addr(ha, netdev)
+               emac_mac_multicast_addr_set(adpt, ha->addr);
+}
+
+/* Change the Maximum Transfer Unit (MTU) */
+static int emac_change_mtu(struct net_device *netdev, int new_mtu)
+{
+       unsigned int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+       struct emac_adapter *adpt = netdev_priv(netdev);
+
+       if ((max_frame < EMAC_MIN_ETH_FRAME_SIZE) ||
+           (max_frame > EMAC_MAX_ETH_FRAME_SIZE)) {
+               netdev_err(adpt->netdev, "error: invalid MTU setting\n");
+               return -EINVAL;
+       }
+
+       netif_info(adpt, hw, adpt->netdev,
+                  "changing MTU from %d to %d\n", netdev->mtu,
+                  new_mtu);
+       netdev->mtu = new_mtu;
+
+       if (netif_running(netdev))
+               return emac_reinit_locked(adpt);
+
+       return 0;
+}
+
+/* Called when the network interface is made active */
+static int emac_open(struct net_device *netdev)
+{
+       struct emac_adapter *adpt = netdev_priv(netdev);
+       int ret;
+
+       /* allocate rx/tx dma buffer & descriptors */
+       ret = emac_mac_rx_tx_rings_alloc_all(adpt);
+       if (ret) {
+               netdev_err(adpt->netdev, "error allocating rx/tx rings\n");
+               return ret;
+       }
+
+       ret = emac_mac_up(adpt);
+       if (ret) {
+               emac_mac_rx_tx_rings_free_all(adpt);
+               return ret;
+       }
+
+       emac_mac_start(adpt);
+
+       return 0;
+}
+
+/* Called when the network interface is disabled */
+static int emac_close(struct net_device *netdev)
+{
+       struct emac_adapter *adpt = netdev_priv(netdev);
+
+       mutex_lock(&adpt->reset_lock);
+
+       emac_mac_down(adpt);
+       emac_mac_rx_tx_rings_free_all(adpt);
+
+       mutex_unlock(&adpt->reset_lock);
+
+       return 0;
+}
+
+/* Respond to a TX hang */
+static void emac_tx_timeout(struct net_device *netdev)
+{
+       struct emac_adapter *adpt = netdev_priv(netdev);
+
+       schedule_work(&adpt->work_thread);
+}
+
+/* IOCTL support for the interface */
+static int emac_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+       if (!netif_running(netdev))
+               return -EINVAL;
+
+       if (!netdev->phydev)
+               return -ENODEV;
+
+       return phy_mii_ioctl(netdev->phydev, ifr, cmd);
+}
+
+/* Provide network statistics info for the interface */
+static struct rtnl_link_stats64 *emac_get_stats64(struct net_device *netdev,
+                                                 struct rtnl_link_stats64 *net_stats)
+{
+       struct emac_adapter *adpt = netdev_priv(netdev);
+       unsigned int addr = REG_MAC_RX_STATUS_BIN;
+       struct emac_stats *stats = &adpt->stats;
+       u64 *stats_itr = &adpt->stats.rx_ok;
+       u32 val;
+
+       spin_lock(&stats->lock);
+
+       while (addr <= REG_MAC_RX_STATUS_END) {
+               val = readl_relaxed(adpt->base + addr);
+               *stats_itr += val;
+               stats_itr++;
+               addr += sizeof(u32);
+       }
+
+       /* additional rx status */
+       val = readl_relaxed(adpt->base + EMAC_RXMAC_STATC_REG23);
+       adpt->stats.rx_crc_align += val;
+       val = readl_relaxed(adpt->base + EMAC_RXMAC_STATC_REG24);
+       adpt->stats.rx_jabbers += val;
+
+       /* update tx status */
+       addr = REG_MAC_TX_STATUS_BIN;
+       stats_itr = &adpt->stats.tx_ok;
+
+       while (addr <= REG_MAC_TX_STATUS_END) {
+               val = readl_relaxed(adpt->base + addr);
+               *stats_itr += val;
+               ++stats_itr;
+               addr += sizeof(u32);
+       }
+
+       /* additional tx status */
+       val = readl_relaxed(adpt->base + EMAC_TXMAC_STATC_REG25);
+       adpt->stats.tx_col += val;
+
+       /* return parsed statistics */
+       net_stats->rx_packets = stats->rx_ok;
+       net_stats->tx_packets = stats->tx_ok;
+       net_stats->rx_bytes = stats->rx_byte_cnt;
+       net_stats->tx_bytes = stats->tx_byte_cnt;
+       net_stats->multicast = stats->rx_mcast;
+       net_stats->collisions = stats->tx_1_col + stats->tx_2_col * 2 +
+                               stats->tx_late_col + stats->tx_abort_col;
+
+       net_stats->rx_errors = stats->rx_frag + stats->rx_fcs_err +
+                              stats->rx_len_err + stats->rx_sz_ov +
+                              stats->rx_align_err;
+       net_stats->rx_fifo_errors = stats->rx_rxf_ov;
+       net_stats->rx_length_errors = stats->rx_len_err;
+       net_stats->rx_crc_errors = stats->rx_fcs_err;
+       net_stats->rx_frame_errors = stats->rx_align_err;
+       net_stats->rx_over_errors = stats->rx_rxf_ov;
+       net_stats->rx_missed_errors = stats->rx_rxf_ov;
+
+       net_stats->tx_errors = stats->tx_late_col + stats->tx_abort_col +
+                              stats->tx_underrun + stats->tx_trunc;
+       net_stats->tx_fifo_errors = stats->tx_underrun;
+       net_stats->tx_aborted_errors = stats->tx_abort_col;
+       net_stats->tx_window_errors = stats->tx_late_col;
+
+       spin_unlock(&stats->lock);
+
+       return net_stats;
+}
+
+static const struct net_device_ops emac_netdev_ops = {
+       .ndo_open               = emac_open,
+       .ndo_stop               = emac_close,
+       .ndo_validate_addr      = eth_validate_addr,
+       .ndo_start_xmit         = emac_start_xmit,
+       .ndo_set_mac_address    = eth_mac_addr,
+       .ndo_change_mtu         = emac_change_mtu,
+       .ndo_do_ioctl           = emac_ioctl,
+       .ndo_tx_timeout         = emac_tx_timeout,
+       .ndo_get_stats64        = emac_get_stats64,
+       .ndo_set_features       = emac_set_features,
+       .ndo_set_rx_mode        = emac_rx_mode_set,
+};
+
+/* Watchdog task routine, called to reinitialize the EMAC */
+static void emac_work_thread(struct work_struct *work)
+{
+       struct emac_adapter *adpt =
+               container_of(work, struct emac_adapter, work_thread);
+
+       emac_reinit_locked(adpt);
+}
+
+/* Initialize various data structures  */
+static void emac_init_adapter(struct emac_adapter *adpt)
+{
+       u32 reg;
+
+       /* descriptors */
+       adpt->tx_desc_cnt = EMAC_DEF_TX_DESCS;
+       adpt->rx_desc_cnt = EMAC_DEF_RX_DESCS;
+
+       /* dma */
+       adpt->dma_order = emac_dma_ord_out;
+       adpt->dmar_block = emac_dma_req_4096;
+       adpt->dmaw_block = emac_dma_req_128;
+       adpt->dmar_dly_cnt = DMAR_DLY_CNT_DEF;
+       adpt->dmaw_dly_cnt = DMAW_DLY_CNT_DEF;
+       adpt->tpd_burst = TXQ0_NUM_TPD_PREF_DEF;
+       adpt->rfd_burst = RXQ0_NUM_RFD_PREF_DEF;
+
+       /* irq moderator */
+       reg = ((EMAC_DEF_RX_IRQ_MOD >> 1) << IRQ_MODERATOR2_INIT_SHFT) |
+             ((EMAC_DEF_TX_IRQ_MOD >> 1) << IRQ_MODERATOR_INIT_SHFT);
+       adpt->irq_mod = reg;
+
+       /* others */
+       adpt->preamble = EMAC_PREAMBLE_DEF;
+}
+
+/* Get the clock */
+static int emac_clks_get(struct platform_device *pdev,
+                        struct emac_adapter *adpt)
+{
+       unsigned int i;
+
+       for (i = 0; i < EMAC_CLK_CNT; i++) {
+               struct clk *clk = devm_clk_get(&pdev->dev, emac_clk_name[i]);
+
+               if (IS_ERR(clk)) {
+                       dev_err(&pdev->dev,
+                               "could not claim clock %s (error=%li)\n",
+                               emac_clk_name[i], PTR_ERR(clk));
+
+                       return PTR_ERR(clk);
+               }
+
+               adpt->clk[i] = clk;
+       }
+
+       return 0;
+}
+
+/* Initialize clocks */
+static int emac_clks_phase1_init(struct platform_device *pdev,
+                                struct emac_adapter *adpt)
+{
+       int ret;
+
+       ret = emac_clks_get(pdev, adpt);
+       if (ret)
+               return ret;
+
+       ret = clk_prepare_enable(adpt->clk[EMAC_CLK_AXI]);
+       if (ret)
+               return ret;
+
+       ret = clk_prepare_enable(adpt->clk[EMAC_CLK_CFG_AHB]);
+       if (ret)
+               return ret;
+
+       ret = clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 19200000);
+       if (ret)
+               return ret;
+
+       return clk_prepare_enable(adpt->clk[EMAC_CLK_HIGH_SPEED]);
+}
+
+/* Enable clocks; needs emac_clks_phase1_init to be called before */
+static int emac_clks_phase2_init(struct platform_device *pdev,
+                                struct emac_adapter *adpt)
+{
+       int ret;
+
+       ret = clk_set_rate(adpt->clk[EMAC_CLK_TX], 125000000);
+       if (ret)
+               return ret;
+
+       ret = clk_prepare_enable(adpt->clk[EMAC_CLK_TX]);
+       if (ret)
+               return ret;
+
+       ret = clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 125000000);
+       if (ret)
+               return ret;
+
+       ret = clk_set_rate(adpt->clk[EMAC_CLK_MDIO], 25000000);
+       if (ret)
+               return ret;
+
+       ret = clk_prepare_enable(adpt->clk[EMAC_CLK_MDIO]);
+       if (ret)
+               return ret;
+
+       ret = clk_prepare_enable(adpt->clk[EMAC_CLK_RX]);
+       if (ret)
+               return ret;
+
+       return clk_prepare_enable(adpt->clk[EMAC_CLK_SYS]);
+}
+
+static void emac_clks_teardown(struct emac_adapter *adpt)
+{
+
+       unsigned int i;
+
+       for (i = 0; i < EMAC_CLK_CNT; i++)
+               clk_disable_unprepare(adpt->clk[i]);
+}
+
+/* Get the resources */
+static int emac_probe_resources(struct platform_device *pdev,
+                               struct emac_adapter *adpt)
+{
+       struct device_node *node = pdev->dev.of_node;
+       struct net_device *netdev = adpt->netdev;
+       struct resource *res;
+       const void *maddr;
+       int ret = 0;
+
+       /* get mac address */
+       maddr = of_get_mac_address(node);
+       if (!maddr)
+               eth_hw_addr_random(netdev);
+       else
+               ether_addr_copy(netdev->dev_addr, maddr);
+
+       /* Core 0 interrupt */
+       ret = platform_get_irq(pdev, 0);
+       if (ret < 0) {
+               dev_err(&pdev->dev,
+                       "error: missing core0 irq resource (error=%i)\n", ret);
+               return ret;
+       }
+       adpt->irq.irq = ret;
+
+       /* base register address */
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       adpt->base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(adpt->base))
+               return PTR_ERR(adpt->base);
+
+       /* CSR register address */
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       adpt->csr = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(adpt->csr))
+               return PTR_ERR(adpt->csr);
+
+       netdev->base_addr = (unsigned long)adpt->base;
+
+       return 0;
+}
+
+static const struct of_device_id emac_dt_match[] = {
+       {
+               .compatible = "qcom,fsm9900-emac",
+       },
+       {}
+};
+
+static int emac_probe(struct platform_device *pdev)
+{
+       struct net_device *netdev;
+       struct emac_adapter *adpt;
+       struct emac_phy *phy;
+       u16 devid, revid;
+       u32 reg;
+       int ret;
+
+       /* The EMAC itself is capable of 64-bit DMA, so try that first. */
+       ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+       if (ret) {
+               /* Some platforms may restrict the EMAC's address bus to less
+                * then the size of DDR. In this case, we need to try a
+                * smaller mask.  We could try every possible smaller mask,
+                * but that's overkill.  Instead, just fall to 32-bit, which
+                * should always work.
+                */
+               ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+               if (ret) {
+                       dev_err(&pdev->dev, "could not set DMA mask\n");
+                       return ret;
+               }
+       }
+
+       netdev = alloc_etherdev(sizeof(struct emac_adapter));
+       if (!netdev)
+               return -ENOMEM;
+
+       dev_set_drvdata(&pdev->dev, netdev);
+       SET_NETDEV_DEV(netdev, &pdev->dev);
+
+       adpt = netdev_priv(netdev);
+       adpt->netdev = netdev;
+       adpt->msg_enable = EMAC_MSG_DEFAULT;
+
+       phy = &adpt->phy;
+
+       mutex_init(&adpt->reset_lock);
+       spin_lock_init(&adpt->stats.lock);
+
+       adpt->irq.mask = RX_PKT_INT0 | IMR_NORMAL_MASK;
+
+       ret = emac_probe_resources(pdev, adpt);
+       if (ret)
+               goto err_undo_netdev;
+
+       /* initialize clocks */
+       ret = emac_clks_phase1_init(pdev, adpt);
+       if (ret) {
+               dev_err(&pdev->dev, "could not initialize clocks\n");
+               goto err_undo_netdev;
+       }
+
+       netdev->watchdog_timeo = EMAC_WATCHDOG_TIME;
+       netdev->irq = adpt->irq.irq;
+
+       adpt->rrd_size = EMAC_RRD_SIZE;
+       adpt->tpd_size = EMAC_TPD_SIZE;
+       adpt->rfd_size = EMAC_RFD_SIZE;
+
+       netdev->netdev_ops = &emac_netdev_ops;
+
+       emac_init_adapter(adpt);
+
+       /* init external phy */
+       ret = emac_phy_config(pdev, adpt);
+       if (ret)
+               goto err_undo_clocks;
+
+       /* init internal sgmii phy */
+       ret = emac_sgmii_config(pdev, adpt);
+       if (ret)
+               goto err_undo_mdiobus;
+
+       /* enable clocks */
+       ret = emac_clks_phase2_init(pdev, adpt);
+       if (ret) {
+               dev_err(&pdev->dev, "could not initialize clocks\n");
+               goto err_undo_mdiobus;
+       }
+
+       emac_mac_reset(adpt);
+
+       /* set hw features */
+       netdev->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
+                       NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_HW_VLAN_CTAG_RX |
+                       NETIF_F_HW_VLAN_CTAG_TX;
+       netdev->hw_features = netdev->features;
+
+       netdev->vlan_features |= NETIF_F_SG | NETIF_F_HW_CSUM |
+                                NETIF_F_TSO | NETIF_F_TSO6;
+
+       INIT_WORK(&adpt->work_thread, emac_work_thread);
+
+       /* Initialize queues */
+       emac_mac_rx_tx_ring_init_all(pdev, adpt);
+
+       netif_napi_add(netdev, &adpt->rx_q.napi, emac_napi_rtx,
+                      NAPI_POLL_WEIGHT);
+
+       ret = register_netdev(netdev);
+       if (ret) {
+               dev_err(&pdev->dev, "could not register net device\n");
+               goto err_undo_napi;
+       }
+
+       reg =  readl_relaxed(adpt->base + EMAC_DMA_MAS_CTRL);
+       devid = (reg & DEV_ID_NUM_BMSK)  >> DEV_ID_NUM_SHFT;
+       revid = (reg & DEV_REV_NUM_BMSK) >> DEV_REV_NUM_SHFT;
+       reg = readl_relaxed(adpt->base + EMAC_CORE_HW_VERSION);
+
+       netif_info(adpt, probe, netdev,
+                  "hardware id %d.%d, hardware version %d.%d.%d\n",
+                  devid, revid,
+                  (reg & MAJOR_BMSK) >> MAJOR_SHFT,
+                  (reg & MINOR_BMSK) >> MINOR_SHFT,
+                  (reg & STEP_BMSK)  >> STEP_SHFT);
+
+       return 0;
+
+err_undo_napi:
+       netif_napi_del(&adpt->rx_q.napi);
+err_undo_mdiobus:
+       mdiobus_unregister(adpt->mii_bus);
+err_undo_clocks:
+       emac_clks_teardown(adpt);
+err_undo_netdev:
+       free_netdev(netdev);
+
+       return ret;
+}
+
+static int emac_remove(struct platform_device *pdev)
+{
+       struct net_device *netdev = dev_get_drvdata(&pdev->dev);
+       struct emac_adapter *adpt = netdev_priv(netdev);
+
+       unregister_netdev(netdev);
+       netif_napi_del(&adpt->rx_q.napi);
+
+       emac_clks_teardown(adpt);
+
+       mdiobus_unregister(adpt->mii_bus);
+       free_netdev(netdev);
+
+       return 0;
+}
+
+static struct platform_driver emac_platform_driver = {
+       .probe  = emac_probe,
+       .remove = emac_remove,
+       .driver = {
+               .name           = "qcom-emac",
+               .of_match_table = emac_dt_match,
+       },
+};
+
+module_platform_driver(emac_platform_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:qcom-emac");
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.h b/drivers/net/ethernet/qualcomm/emac/emac.h
new file mode 100644 (file)
index 0000000..0c76e6c
--- /dev/null
@@ -0,0 +1,335 @@
+/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _EMAC_H_
+#define _EMAC_H_
+
+#include <linux/irqreturn.h>
+#include <linux/netdevice.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include "emac-mac.h"
+#include "emac-phy.h"
+
+/* EMAC base register offsets */
+#define EMAC_DMA_MAS_CTRL                                     0x001400
+#define EMAC_IRQ_MOD_TIM_INIT                                 0x001408
+#define EMAC_BLK_IDLE_STS                                     0x00140c
+#define EMAC_PHY_LINK_DELAY                                   0x00141c
+#define EMAC_SYS_ALIV_CTRL                                    0x001434
+#define EMAC_MAC_IPGIFG_CTRL                                  0x001484
+#define EMAC_MAC_STA_ADDR0                                    0x001488
+#define EMAC_MAC_STA_ADDR1                                    0x00148c
+#define EMAC_HASH_TAB_REG0                                    0x001490
+#define EMAC_HASH_TAB_REG1                                    0x001494
+#define EMAC_MAC_HALF_DPLX_CTRL                               0x001498
+#define EMAC_MAX_FRAM_LEN_CTRL                                0x00149c
+#define EMAC_INT_STATUS                                       0x001600
+#define EMAC_INT_MASK                                         0x001604
+#define EMAC_RXMAC_STATC_REG0                                 0x001700
+#define EMAC_RXMAC_STATC_REG22                                0x001758
+#define EMAC_TXMAC_STATC_REG0                                 0x001760
+#define EMAC_TXMAC_STATC_REG24                                0x0017c0
+#define EMAC_CORE_HW_VERSION                                  0x001974
+#define EMAC_IDT_TABLE0                                       0x001b00
+#define EMAC_RXMAC_STATC_REG23                                0x001bc8
+#define EMAC_RXMAC_STATC_REG24                                0x001bcc
+#define EMAC_TXMAC_STATC_REG25                                0x001bd0
+#define EMAC_INT1_MASK                                        0x001bf0
+#define EMAC_INT1_STATUS                                      0x001bf4
+#define EMAC_INT2_MASK                                        0x001bf8
+#define EMAC_INT2_STATUS                                      0x001bfc
+#define EMAC_INT3_MASK                                        0x001c00
+#define EMAC_INT3_STATUS                                      0x001c04
+
+/* EMAC_DMA_MAS_CTRL */
+#define DEV_ID_NUM_BMSK                                     0x7f000000
+#define DEV_ID_NUM_SHFT                                             24
+#define DEV_REV_NUM_BMSK                                      0xff0000
+#define DEV_REV_NUM_SHFT                                            16
+#define INT_RD_CLR_EN                                           0x4000
+#define IRQ_MODERATOR2_EN                                        0x800
+#define IRQ_MODERATOR_EN                                         0x400
+#define LPW_CLK_SEL                                               0x80
+#define LPW_STATE                                                 0x20
+#define LPW_MODE                                                  0x10
+#define SOFT_RST                                                   0x1
+
+/* EMAC_IRQ_MOD_TIM_INIT */
+#define IRQ_MODERATOR2_INIT_BMSK                            0xffff0000
+#define IRQ_MODERATOR2_INIT_SHFT                                    16
+#define IRQ_MODERATOR_INIT_BMSK                                 0xffff
+#define IRQ_MODERATOR_INIT_SHFT                                      0
+
+/* EMAC_INT_STATUS */
+#define DIS_INT                                                BIT(31)
+#define PTP_INT                                                BIT(30)
+#define RFD4_UR_INT                                            BIT(29)
+#define TX_PKT_INT3                                            BIT(26)
+#define TX_PKT_INT2                                            BIT(25)
+#define TX_PKT_INT1                                            BIT(24)
+#define RX_PKT_INT3                                            BIT(19)
+#define RX_PKT_INT2                                            BIT(18)
+#define RX_PKT_INT1                                            BIT(17)
+#define RX_PKT_INT0                                            BIT(16)
+#define TX_PKT_INT                                             BIT(15)
+#define TXQ_TO_INT                                             BIT(14)
+#define GPHY_WAKEUP_INT                                        BIT(13)
+#define GPHY_LINK_DOWN_INT                                     BIT(12)
+#define GPHY_LINK_UP_INT                                       BIT(11)
+#define DMAW_TO_INT                                            BIT(10)
+#define DMAR_TO_INT                                             BIT(9)
+#define TXF_UR_INT                                              BIT(8)
+#define RFD3_UR_INT                                             BIT(7)
+#define RFD2_UR_INT                                             BIT(6)
+#define RFD1_UR_INT                                             BIT(5)
+#define RFD0_UR_INT                                             BIT(4)
+#define RXF_OF_INT                                              BIT(3)
+#define SW_MAN_INT                                              BIT(2)
+
+/* EMAC_MAILBOX_6 */
+#define RFD2_PROC_IDX_BMSK                                   0xfff0000
+#define RFD2_PROC_IDX_SHFT                                          16
+#define RFD2_PROD_IDX_BMSK                                       0xfff
+#define RFD2_PROD_IDX_SHFT                                           0
+
+/* EMAC_CORE_HW_VERSION */
+#define MAJOR_BMSK                                          0xf0000000
+#define MAJOR_SHFT                                                  28
+#define MINOR_BMSK                                           0xfff0000
+#define MINOR_SHFT                                                  16
+#define STEP_BMSK                                               0xffff
+#define STEP_SHFT                                                    0
+
+/* EMAC_EMAC_WRAPPER_CSR1 */
+#define TX_INDX_FIFO_SYNC_RST                                  BIT(23)
+#define TX_TS_FIFO_SYNC_RST                                    BIT(22)
+#define RX_TS_FIFO2_SYNC_RST                                   BIT(21)
+#define RX_TS_FIFO1_SYNC_RST                                   BIT(20)
+#define TX_TS_ENABLE                                           BIT(16)
+#define DIS_1588_CLKS                                          BIT(11)
+#define FREQ_MODE                                               BIT(9)
+#define ENABLE_RRD_TIMESTAMP                                    BIT(3)
+
+/* EMAC_EMAC_WRAPPER_CSR2 */
+#define HDRIVE_BMSK                                             0x3000
+#define HDRIVE_SHFT                                                 12
+#define SLB_EN                                                  BIT(9)
+#define PLB_EN                                                  BIT(8)
+#define WOL_EN                                                  BIT(3)
+#define PHY_RESET                                               BIT(0)
+
+#define EMAC_DEV_ID                                             0x0040
+
+/* SGMII v2 per lane registers */
+#define SGMII_LN_RSM_START             0x029C
+
+/* SGMII v2 PHY common registers */
+#define SGMII_PHY_CMN_CTRL            0x0408
+#define SGMII_PHY_CMN_RESET_CTRL      0x0410
+
+/* SGMII v2 PHY registers per lane */
+#define SGMII_PHY_LN_OFFSET          0x0400
+#define SGMII_PHY_LN_LANE_STATUS     0x00DC
+#define SGMII_PHY_LN_BIST_GEN0       0x008C
+#define SGMII_PHY_LN_BIST_GEN1       0x0090
+#define SGMII_PHY_LN_BIST_GEN2       0x0094
+#define SGMII_PHY_LN_BIST_GEN3       0x0098
+#define SGMII_PHY_LN_CDR_CTRL1       0x005C
+
+enum emac_clk_id {
+       EMAC_CLK_AXI,
+       EMAC_CLK_CFG_AHB,
+       EMAC_CLK_HIGH_SPEED,
+       EMAC_CLK_MDIO,
+       EMAC_CLK_TX,
+       EMAC_CLK_RX,
+       EMAC_CLK_SYS,
+       EMAC_CLK_CNT
+};
+
+#define EMAC_LINK_SPEED_UNKNOWN                                    0x0
+#define EMAC_LINK_SPEED_10_HALF                                 BIT(0)
+#define EMAC_LINK_SPEED_10_FULL                                 BIT(1)
+#define EMAC_LINK_SPEED_100_HALF                                BIT(2)
+#define EMAC_LINK_SPEED_100_FULL                                BIT(3)
+#define EMAC_LINK_SPEED_1GB_FULL                                BIT(5)
+
+#define EMAC_MAX_SETUP_LNK_CYCLE                                   100
+
+/* Wake On Lan */
+#define EMAC_WOL_PHY                     0x00000001 /* PHY Status Change */
+#define EMAC_WOL_MAGIC                   0x00000002 /* Magic Packet */
+
+struct emac_stats {
+       /* rx */
+       u64 rx_ok;              /* good packets */
+       u64 rx_bcast;           /* good broadcast packets */
+       u64 rx_mcast;           /* good multicast packets */
+       u64 rx_pause;           /* pause packet */
+       u64 rx_ctrl;            /* control packets other than pause frame. */
+       u64 rx_fcs_err;         /* packets with bad FCS. */
+       u64 rx_len_err;         /* packets with length mismatch */
+       u64 rx_byte_cnt;        /* good bytes count (without FCS) */
+       u64 rx_runt;            /* runt packets */
+       u64 rx_frag;            /* fragment count */
+       u64 rx_sz_64;           /* packets that are 64 bytes */
+       u64 rx_sz_65_127;       /* packets that are 65-127 bytes */
+       u64 rx_sz_128_255;      /* packets that are 128-255 bytes */
+       u64 rx_sz_256_511;      /* packets that are 256-511 bytes */
+       u64 rx_sz_512_1023;     /* packets that are 512-1023 bytes */
+       u64 rx_sz_1024_1518;    /* packets that are 1024-1518 bytes */
+       u64 rx_sz_1519_max;     /* packets that are 1519-MTU bytes*/
+       u64 rx_sz_ov;           /* packets that are >MTU bytes (truncated) */
+       u64 rx_rxf_ov;          /* packets dropped due to RX FIFO overflow */
+       u64 rx_align_err;       /* alignment errors */
+       u64 rx_bcast_byte_cnt;  /* broadcast packets byte count (without FCS) */
+       u64 rx_mcast_byte_cnt;  /* multicast packets byte count (without FCS) */
+       u64 rx_err_addr;        /* packets dropped due to address filtering */
+       u64 rx_crc_align;       /* CRC align errors */
+       u64 rx_jabbers;         /* jabbers */
+
+       /* tx */
+       u64 tx_ok;              /* good packets */
+       u64 tx_bcast;           /* good broadcast packets */
+       u64 tx_mcast;           /* good multicast packets */
+       u64 tx_pause;           /* pause packets */
+       u64 tx_exc_defer;       /* packets with excessive deferral */
+       u64 tx_ctrl;            /* control packets other than pause frame */
+       u64 tx_defer;           /* packets that are deferred. */
+       u64 tx_byte_cnt;        /* good bytes count (without FCS) */
+       u64 tx_sz_64;           /* packets that are 64 bytes */
+       u64 tx_sz_65_127;       /* packets that are 65-127 bytes */
+       u64 tx_sz_128_255;      /* packets that are 128-255 bytes */
+       u64 tx_sz_256_511;      /* packets that are 256-511 bytes */
+       u64 tx_sz_512_1023;     /* packets that are 512-1023 bytes */
+       u64 tx_sz_1024_1518;    /* packets that are 1024-1518 bytes */
+       u64 tx_sz_1519_max;     /* packets that are 1519-MTU bytes */
+       u64 tx_1_col;           /* packets single prior collision */
+       u64 tx_2_col;           /* packets with multiple prior collisions */
+       u64 tx_late_col;        /* packets with late collisions */
+       u64 tx_abort_col;       /* packets aborted due to excess collisions */
+       u64 tx_underrun;        /* packets aborted due to FIFO underrun */
+       u64 tx_rd_eop;          /* count of reads beyond EOP */
+       u64 tx_len_err;         /* packets with length mismatch */
+       u64 tx_trunc;           /* packets truncated due to size >MTU */
+       u64 tx_bcast_byte;      /* broadcast packets byte count (without FCS) */
+       u64 tx_mcast_byte;      /* multicast packets byte count (without FCS) */
+       u64 tx_col;             /* collisions */
+
+       spinlock_t lock;        /* prevent multiple simultaneous readers */
+};
+
+/* RSS hstype Definitions */
+#define EMAC_RSS_HSTYP_IPV4_EN                             0x00000001
+#define EMAC_RSS_HSTYP_TCP4_EN                             0x00000002
+#define EMAC_RSS_HSTYP_IPV6_EN                             0x00000004
+#define EMAC_RSS_HSTYP_TCP6_EN                             0x00000008
+#define EMAC_RSS_HSTYP_ALL_EN (\
+               EMAC_RSS_HSTYP_IPV4_EN   |\
+               EMAC_RSS_HSTYP_TCP4_EN   |\
+               EMAC_RSS_HSTYP_IPV6_EN   |\
+               EMAC_RSS_HSTYP_TCP6_EN)
+
+#define EMAC_VLAN_TO_TAG(_vlan, _tag) \
+               (_tag =  ((((_vlan) >> 8) & 0xFF) | (((_vlan) & 0xFF) << 8)))
+
+#define EMAC_TAG_TO_VLAN(_tag, _vlan) \
+               (_vlan = ((((_tag) >> 8) & 0xFF) | (((_tag) & 0xFF) << 8)))
+
+#define EMAC_DEF_RX_BUF_SIZE                                     1536
+#define EMAC_MAX_JUMBO_PKT_SIZE                                    (9 * 1024)
+#define EMAC_MAX_TX_OFFLOAD_THRESH                         (9 * 1024)
+
+#define EMAC_MAX_ETH_FRAME_SIZE                       EMAC_MAX_JUMBO_PKT_SIZE
+#define EMAC_MIN_ETH_FRAME_SIZE                                            68
+
+#define EMAC_DEF_TX_QUEUES                                          1
+#define EMAC_DEF_RX_QUEUES                                          1
+
+#define EMAC_MIN_TX_DESCS                                         128
+#define EMAC_MIN_RX_DESCS                                         128
+
+#define EMAC_MAX_TX_DESCS                                       16383
+#define EMAC_MAX_RX_DESCS                                        2047
+
+#define EMAC_DEF_TX_DESCS                                         512
+#define EMAC_DEF_RX_DESCS                                         256
+
+#define EMAC_DEF_RX_IRQ_MOD                                       250
+#define EMAC_DEF_TX_IRQ_MOD                                       250
+
+#define EMAC_WATCHDOG_TIME                                   (5 * HZ)
+
+/* by default check link every 4 seconds */
+#define EMAC_TRY_LINK_TIMEOUT                                (4 * HZ)
+
+/* emac_irq per-device (per-adapter) irq properties.
+ * @irq:       irq number.
+ * @mask       mask to use over status register.
+ */
+struct emac_irq {
+       unsigned int    irq;
+       u32             mask;
+};
+
+/* The device's main data structure */
+struct emac_adapter {
+       struct net_device               *netdev;
+       struct mii_bus                  *mii_bus;
+       struct phy_device               *phydev;
+
+       void __iomem                    *base;
+       void __iomem                    *csr;
+
+       struct emac_phy                 phy;
+       struct emac_stats               stats;
+
+       struct emac_irq                 irq;
+       struct clk                      *clk[EMAC_CLK_CNT];
+
+       /* All Descriptor memory */
+       struct emac_ring_header         ring_header;
+       struct emac_tx_queue            tx_q;
+       struct emac_rx_queue            rx_q;
+       unsigned int                    tx_desc_cnt;
+       unsigned int                    rx_desc_cnt;
+       unsigned int                    rrd_size; /* in quad words */
+       unsigned int                    rfd_size; /* in quad words */
+       unsigned int                    tpd_size; /* in quad words */
+
+       unsigned int                    rxbuf_size;
+
+       /* Ring parameter */
+       u8                              tpd_burst;
+       u8                              rfd_burst;
+       unsigned int                    dmaw_dly_cnt;
+       unsigned int                    dmar_dly_cnt;
+       enum emac_dma_req_block         dmar_block;
+       enum emac_dma_req_block         dmaw_block;
+       enum emac_dma_order             dma_order;
+
+       u32                             irq_mod;
+       u32                             preamble;
+
+       struct work_struct              work_thread;
+
+       u16                             msg_enable;
+
+       struct mutex                    reset_lock;
+};
+
+int emac_reinit_locked(struct emac_adapter *adpt);
+void emac_reg_update32(void __iomem *addr, u32 mask, u32 val);
+irqreturn_t emac_isr(int irq, void *data);
+
+#endif /* _EMAC_H_ */
index deae10d..5297bf7 100644 (file)
@@ -467,8 +467,8 @@ static int cp_rx_poll(struct napi_struct *napi, int budget)
        unsigned int rx_tail = cp->rx_tail;
        int rx;
 
-rx_status_loop:
        rx = 0;
+rx_status_loop:
        cpw16(IntrStatus, cp_rx_intr_mask);
 
        while (rx < budget) {
index 4e5d5e9..f110966 100644 (file)
@@ -1011,7 +1011,6 @@ struct ravb_private {
        struct work_struct work;
        /* MII transceiver section. */
        struct mii_bus *mii_bus;        /* MDIO bus control */
-       struct phy_device *phydev;      /* PHY device control */
        int link;
        phy_interface_t phy_interface;
        int msg_enable;
index d4809ad..630536b 100644 (file)
@@ -942,7 +942,7 @@ out:
 static void ravb_adjust_link(struct net_device *ndev)
 {
        struct ravb_private *priv = netdev_priv(ndev);
-       struct phy_device *phydev = priv->phydev;
+       struct phy_device *phydev = ndev->phydev;
        bool new_state = false;
 
        if (phydev->link) {
@@ -1032,48 +1032,47 @@ static int ravb_phy_init(struct net_device *ndev)
 
        phy_attached_info(phydev);
 
-       priv->phydev = phydev;
-
        return 0;
 }
 
 /* PHY control start function */
 static int ravb_phy_start(struct net_device *ndev)
 {
-       struct ravb_private *priv = netdev_priv(ndev);
        int error;
 
        error = ravb_phy_init(ndev);
        if (error)
                return error;
 
-       phy_start(priv->phydev);
+       phy_start(ndev->phydev);
 
        return 0;
 }
 
-static int ravb_get_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
+static int ravb_get_link_ksettings(struct net_device *ndev,
+                                  struct ethtool_link_ksettings *cmd)
 {
        struct ravb_private *priv = netdev_priv(ndev);
        int error = -ENODEV;
        unsigned long flags;
 
-       if (priv->phydev) {
+       if (ndev->phydev) {
                spin_lock_irqsave(&priv->lock, flags);
-               error = phy_ethtool_gset(priv->phydev, ecmd);
+               error = phy_ethtool_ksettings_get(ndev->phydev, cmd);
                spin_unlock_irqrestore(&priv->lock, flags);
        }
 
        return error;
 }
 
-static int ravb_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
+static int ravb_set_link_ksettings(struct net_device *ndev,
+                                  const struct ethtool_link_ksettings *cmd)
 {
        struct ravb_private *priv = netdev_priv(ndev);
        unsigned long flags;
        int error;
 
-       if (!priv->phydev)
+       if (!ndev->phydev)
                return -ENODEV;
 
        spin_lock_irqsave(&priv->lock, flags);
@@ -1081,11 +1080,11 @@ static int ravb_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
        /* Disable TX and RX */
        ravb_rcv_snd_disable(ndev);
 
-       error = phy_ethtool_sset(priv->phydev, ecmd);
+       error = phy_ethtool_ksettings_set(ndev->phydev, cmd);
        if (error)
                goto error_exit;
 
-       if (ecmd->duplex == DUPLEX_FULL)
+       if (cmd->base.duplex == DUPLEX_FULL)
                priv->duplex = 1;
        else
                priv->duplex = 0;
@@ -1110,9 +1109,9 @@ static int ravb_nway_reset(struct net_device *ndev)
        int error = -ENODEV;
        unsigned long flags;
 
-       if (priv->phydev) {
+       if (ndev->phydev) {
                spin_lock_irqsave(&priv->lock, flags);
-               error = phy_start_aneg(priv->phydev);
+               error = phy_start_aneg(ndev->phydev);
                spin_unlock_irqrestore(&priv->lock, flags);
        }
 
@@ -1309,8 +1308,6 @@ static int ravb_get_ts_info(struct net_device *ndev,
 }
 
 static const struct ethtool_ops ravb_ethtool_ops = {
-       .get_settings           = ravb_get_settings,
-       .set_settings           = ravb_set_settings,
        .nway_reset             = ravb_nway_reset,
        .get_msglevel           = ravb_get_msglevel,
        .set_msglevel           = ravb_set_msglevel,
@@ -1321,6 +1318,8 @@ static const struct ethtool_ops ravb_ethtool_ops = {
        .get_ringparam          = ravb_get_ringparam,
        .set_ringparam          = ravb_set_ringparam,
        .get_ts_info            = ravb_get_ts_info,
+       .get_link_ksettings     = ravb_get_link_ksettings,
+       .set_link_ksettings     = ravb_set_link_ksettings,
 };
 
 static inline int ravb_hook_irq(unsigned int irq, irq_handler_t handler,
@@ -1661,10 +1660,9 @@ static int ravb_close(struct net_device *ndev)
        }
 
        /* PHY disconnect */
-       if (priv->phydev) {
-               phy_stop(priv->phydev);
-               phy_disconnect(priv->phydev);
-               priv->phydev = NULL;
+       if (ndev->phydev) {
+               phy_stop(ndev->phydev);
+               phy_disconnect(ndev->phydev);
        }
 
        if (priv->chip_id != RCAR_GEN2) {
@@ -1753,8 +1751,7 @@ static int ravb_hwtstamp_set(struct net_device *ndev, struct ifreq *req)
 /* ioctl to device function */
 static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
 {
-       struct ravb_private *priv = netdev_priv(ndev);
-       struct phy_device *phydev = priv->phydev;
+       struct phy_device *phydev = ndev->phydev;
 
        if (!netif_running(ndev))
                return -EINVAL;
@@ -2103,8 +2100,7 @@ static int ravb_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM
-static int ravb_suspend(struct device *dev)
+static int __maybe_unused ravb_suspend(struct device *dev)
 {
        struct net_device *ndev = dev_get_drvdata(dev);
        int ret = 0;
@@ -2117,7 +2113,7 @@ static int ravb_suspend(struct device *dev)
        return ret;
 }
 
-static int ravb_resume(struct device *dev)
+static int __maybe_unused ravb_resume(struct device *dev)
 {
        struct net_device *ndev = dev_get_drvdata(dev);
        struct ravb_private *priv = netdev_priv(ndev);
@@ -2152,7 +2148,7 @@ static int ravb_resume(struct device *dev)
        return ret;
 }
 
-static int ravb_runtime_nop(struct device *dev)
+static int __maybe_unused ravb_runtime_nop(struct device *dev)
 {
        /* Runtime PM callback shared between ->runtime_suspend()
         * and ->runtime_resume(). Simply returns success.
@@ -2169,17 +2165,12 @@ static const struct dev_pm_ops ravb_dev_pm_ops = {
        SET_RUNTIME_PM_OPS(ravb_runtime_nop, ravb_runtime_nop, NULL)
 };
 
-#define RAVB_PM_OPS (&ravb_dev_pm_ops)
-#else
-#define RAVB_PM_OPS NULL
-#endif
-
 static struct platform_driver ravb_driver = {
        .probe          = ravb_probe,
        .remove         = ravb_remove,
        .driver = {
                .name   = "ravb",
-               .pm     = RAVB_PM_OPS,
+               .pm     = &ravb_dev_pm_ops,
                .of_match_table = ravb_match_table,
        },
 };
index 1f8240a..440ae27 100644 (file)
@@ -201,9 +201,14 @@ static const u16 sh_eth_offset_fast_rz[SH_ETH_MAX_REGISTER_OFFSET] = {
 
        [ARSTR]         = 0x0000,
        [TSU_CTRST]     = 0x0004,
+       [TSU_FWSLC]     = 0x0038,
        [TSU_VTAG0]     = 0x0058,
        [TSU_ADSBSY]    = 0x0060,
        [TSU_TEN]       = 0x0064,
+       [TSU_POST1]     = 0x0070,
+       [TSU_POST2]     = 0x0074,
+       [TSU_POST3]     = 0x0078,
+       [TSU_POST4]     = 0x007c,
        [TSU_ADRH0]     = 0x0100,
 
        [TXNLCR0]       = 0x0080,
@@ -2781,6 +2786,8 @@ static void sh_eth_tsu_init(struct sh_eth_private *mdp)
 {
        if (sh_eth_is_rz_fast_ether(mdp)) {
                sh_eth_tsu_write(mdp, 0, TSU_TEN); /* Disable all CAM entry */
+               sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL,
+                                TSU_FWSLC);    /* Enable POST registers */
                return;
        }
 
index f0b09b0..1f0c086 100644 (file)
@@ -2412,7 +2412,7 @@ static int rocker_port_rx_proc(const struct rocker *rocker,
        skb->protocol = eth_type_trans(skb, rocker_port->dev);
 
        if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD)
-               skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark;
+               skb->offload_fwd_mark = 1;
 
        rocker_port->dev->stats.rx_packets++;
        rocker_port->dev->stats.rx_bytes += skb->len;
index 1ca7963..fcad907 100644 (file)
@@ -2558,7 +2558,6 @@ static int ofdpa_port_init(struct rocker_port *rocker_port)
        struct ofdpa_port *ofdpa_port = rocker_port->wpriv;
        int err;
 
-       switchdev_port_fwd_mark_set(ofdpa_port->dev, NULL, false);
        rocker_port_set_learning(rocker_port,
                                 !!(ofdpa_port->brport_flags & BR_LEARNING));
 
@@ -2817,7 +2816,6 @@ static int ofdpa_port_bridge_join(struct ofdpa_port *ofdpa_port,
                ofdpa_port_internal_vlan_id_get(ofdpa_port, bridge->ifindex);
 
        ofdpa_port->bridge_dev = bridge;
-       switchdev_port_fwd_mark_set(ofdpa_port->dev, bridge, true);
 
        return ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0);
 }
@@ -2836,8 +2834,6 @@ static int ofdpa_port_bridge_leave(struct ofdpa_port *ofdpa_port)
                ofdpa_port_internal_vlan_id_get(ofdpa_port,
                                                ofdpa_port->dev->ifindex);
 
-       switchdev_port_fwd_mark_set(ofdpa_port->dev, ofdpa_port->bridge_dev,
-                                   false);
        ofdpa_port->bridge_dev = NULL;
 
        err = ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0);
index b8c9f18..00279da 100644 (file)
@@ -646,7 +646,6 @@ static int efx_ef10_probe(struct efx_nic *efx)
        rc = efx_ef10_get_timer_config(efx);
        if (rc < 0)
                goto fail5;
-       efx->timer_quantum_ns = 1536000 / rc; /* 1536 cycles */
 
        rc = efx_mcdi_mon_probe(efx);
        if (rc && rc != -EPERM)
@@ -1534,9 +1533,10 @@ static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = {
                               (1ULL << GENERIC_STAT_rx_nodesc_trunc) | \
                               (1ULL << GENERIC_STAT_rx_noskb_drops))
 
-/* These statistics are only provided by the 10G MAC.  For a 10G/40G
- * switchable port we do not expose these because they might not
- * include all the packets they should.
+/* On 7000 series NICs, these statistics are only provided by the 10G MAC.
+ * For a 10G/40G switchable port we do not expose these because they might
+ * not include all the packets they should.
+ * On 8000 series NICs these statistics are always provided.
  */
 #define HUNT_10G_ONLY_STAT_MASK ((1ULL << EF10_STAT_port_tx_control) | \
                                 (1ULL << EF10_STAT_port_tx_lt64) |     \
@@ -1582,10 +1582,15 @@ static u64 efx_ef10_raw_stat_mask(struct efx_nic *efx)
              1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_LINKCTRL))
                return 0;
 
-       if (port_caps & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
+       if (port_caps & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) {
                raw_mask |= HUNT_40G_EXTRA_STAT_MASK;
-       else
+               /* 8000 series have everything even at 40G */
+               if (nic_data->datapath_caps2 &
+                   (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_MAC_STATS_40G_TX_SIZE_BINS_LBN))
+                       raw_mask |= HUNT_10G_ONLY_STAT_MASK;
+       } else {
                raw_mask |= HUNT_10G_ONLY_STAT_MASK;
+       }
 
        if (nic_data->datapath_caps &
            (1 << MC_CMD_GET_CAPABILITIES_OUT_PM_AND_RXDP_COUNTERS_LBN))
@@ -1611,13 +1616,14 @@ static void efx_ef10_get_stat_mask(struct efx_nic *efx, unsigned long *mask)
        }
 
 #if BITS_PER_LONG == 64
+       BUILD_BUG_ON(BITS_TO_LONGS(EF10_STAT_COUNT) != 2);
        mask[0] = raw_mask[0];
        mask[1] = raw_mask[1];
 #else
+       BUILD_BUG_ON(BITS_TO_LONGS(EF10_STAT_COUNT) != 3);
        mask[0] = raw_mask[0] & 0xffffffff;
        mask[1] = raw_mask[0] >> 32;
        mask[2] = raw_mask[1] & 0xffffffff;
-       mask[3] = raw_mask[1] >> 32;
 #endif
 }
 
@@ -1710,7 +1716,6 @@ static int efx_ef10_try_update_nic_stats_pf(struct efx_nic *efx)
        efx_ef10_get_stat_mask(efx, mask);
 
        dma_stats = efx->stats_buffer.addr;
-       nic_data = efx->nic_data;
 
        generation_end = dma_stats[MC_CMD_MAC_GENERATION_END];
        if (generation_end == EFX_MC_STATS_GENERATION_INVALID)
@@ -2044,14 +2049,18 @@ static irqreturn_t efx_ef10_legacy_interrupt(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
-static void efx_ef10_irq_test_generate(struct efx_nic *efx)
+static int efx_ef10_irq_test_generate(struct efx_nic *efx)
 {
        MCDI_DECLARE_BUF(inbuf, MC_CMD_TRIGGER_INTERRUPT_IN_LEN);
 
+       if (efx_mcdi_set_workaround(efx, MC_CMD_WORKAROUND_BUG41750, true,
+                                   NULL) == 0)
+               return -ENOTSUPP;
+
        BUILD_BUG_ON(MC_CMD_TRIGGER_INTERRUPT_OUT_LEN != 0);
 
        MCDI_SET_DWORD(inbuf, TRIGGER_INTERRUPT_IN_INTR_LEVEL, efx->irq_level);
-       (void) efx_mcdi_rpc(efx, MC_CMD_TRIGGER_INTERRUPT,
+       return efx_mcdi_rpc(efx, MC_CMD_TRIGGER_INTERRUPT,
                            inbuf, sizeof(inbuf), NULL, 0, NULL);
 }
 
index f3826ae..3cf3557 100644 (file)
@@ -2263,8 +2263,18 @@ static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
        rc = efx_check_disabled(efx);
        if (rc)
                return rc;
-       if (new_mtu > EFX_MAX_MTU)
+       if (new_mtu > EFX_MAX_MTU) {
+               netif_err(efx, drv, efx->net_dev,
+                         "Requested MTU of %d too big (max: %d)\n",
+                         new_mtu, EFX_MAX_MTU);
                return -EINVAL;
+       }
+       if (new_mtu < EFX_MIN_MTU) {
+               netif_err(efx, drv, efx->net_dev,
+                         "Requested MTU of %d too small (min: %d)\n",
+                         new_mtu, EFX_MIN_MTU);
+               return -EINVAL;
+       }
 
        netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
 
index 1736f4b..f6883b2 100644 (file)
@@ -64,7 +64,7 @@
 #define LM87_ALARM_TEMP_INT            0x10
 #define LM87_ALARM_TEMP_EXT1           0x20
 
-#if defined(CONFIG_SENSORS_LM87) || defined(CONFIG_SENSORS_LM87_MODULE)
+#if IS_ENABLED(CONFIG_SENSORS_LM87)
 
 static int efx_poke_lm87(struct i2c_client *client, const u8 *reg_values)
 {
@@ -455,7 +455,7 @@ static int sfe4001_init(struct efx_nic *efx)
        struct falcon_board *board = falcon_board(efx);
        int rc;
 
-#if defined(CONFIG_SENSORS_LM90) || defined(CONFIG_SENSORS_LM90_MODULE)
+#if IS_ENABLED(CONFIG_SENSORS_LM90)
        board->hwmon_client =
                i2c_new_device(&board->i2c_adap, &sfe4001_hwmon_info);
 #else
index 4c83739..4762ec4 100644 (file)
@@ -1477,9 +1477,10 @@ void efx_farch_irq_disable_master(struct efx_nic *efx)
  * Interrupt must already have been enabled, otherwise nasty things
  * may happen.
  */
-void efx_farch_irq_test_generate(struct efx_nic *efx)
+int efx_farch_irq_test_generate(struct efx_nic *efx)
 {
        efx_farch_interrupts(efx, true, true);
+       return 0;
 }
 
 /* Process a fatal interrupt
index 13b7f52..99d8c82 100644 (file)
@@ -76,6 +76,9 @@
 /* Maximum possible MTU the driver supports */
 #define EFX_MAX_MTU (9 * 1024)
 
+/* Minimum MTU, from RFC791 (IP) */
+#define EFX_MIN_MTU 68
+
 /* Size of an RX scatter buffer.  Small enough to pack 2 into a 4K page,
  * and should be a multiple of the cache line size.
  */
@@ -1275,7 +1278,7 @@ struct efx_nic_type {
        int (*mcdi_poll_reboot)(struct efx_nic *efx);
        void (*mcdi_reboot_detected)(struct efx_nic *efx);
        void (*irq_enable_master)(struct efx_nic *efx);
-       void (*irq_test_generate)(struct efx_nic *efx);
+       int (*irq_test_generate)(struct efx_nic *efx);
        void (*irq_disable_non_ev)(struct efx_nic *efx);
        irqreturn_t (*irq_handle_msi)(int irq, void *dev_id);
        irqreturn_t (*irq_handle_legacy)(int irq, void *dev_id);
index 89b83e5..aa1945a 100644 (file)
@@ -66,11 +66,11 @@ void efx_nic_event_test_start(struct efx_channel *channel)
        channel->efx->type->ev_test_generate(channel);
 }
 
-void efx_nic_irq_test_start(struct efx_nic *efx)
+int efx_nic_irq_test_start(struct efx_nic *efx)
 {
        efx->last_irq_cpu = -1;
        smp_wmb();
-       efx->type->irq_test_generate(efx);
+       return efx->type->irq_test_generate(efx);
 }
 
 /* Hook interrupt handler(s)
index d8b1694..73bee7e 100644 (file)
@@ -746,12 +746,12 @@ static inline void efx_update_diff_stat(u64 *stat, u64 diff)
 
 /* Interrupts */
 int efx_nic_init_interrupt(struct efx_nic *efx);
-void efx_nic_irq_test_start(struct efx_nic *efx);
+int efx_nic_irq_test_start(struct efx_nic *efx);
 void efx_nic_fini_interrupt(struct efx_nic *efx);
 
 /* Falcon/Siena interrupts */
 void efx_farch_irq_enable_master(struct efx_nic *efx);
-void efx_farch_irq_test_generate(struct efx_nic *efx);
+int efx_farch_irq_test_generate(struct efx_nic *efx);
 void efx_farch_irq_disable_master(struct efx_nic *efx);
 irqreturn_t efx_farch_msi_interrupt(int irq, void *dev_id);
 irqreturn_t efx_farch_legacy_interrupt(int irq, void *dev_id);
index 9d78830..cd38b44 100644 (file)
@@ -135,11 +135,19 @@ static int efx_test_interrupts(struct efx_nic *efx,
 {
        unsigned long timeout, wait;
        int cpu;
+       int rc;
 
        netif_dbg(efx, drv, efx->net_dev, "testing interrupts\n");
        tests->interrupt = -1;
 
-       efx_nic_irq_test_start(efx);
+       rc = efx_nic_irq_test_start(efx);
+       if (rc == -ENOTSUPP) {
+               netif_dbg(efx, drv, efx->net_dev,
+                         "direct interrupt testing not supported\n");
+               tests->interrupt = 0;
+               return 0;
+       }
+
        timeout = jiffies + IRQ_TIMEOUT;
        wait = 1;
 
index 009dbe8..32a4272 100644 (file)
@@ -28,7 +28,7 @@ struct efx_loopback_self_tests {
 
 /* Efx self test results
  * For fields which are not counters, 1 indicates success and -1
- * indicates failure.
+ * indicates failure; 0 indicates test could not be run.
  */
 struct efx_self_tests {
        /* online tests */
index 95001ee..6f85276 100644 (file)
@@ -1426,7 +1426,7 @@ static void sis900_set_mode(struct sis900_private *sp, int speed, int duplex)
                rx_flags |= RxATX;
        }
 
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
        /* Can accept Jumbo packet */
        rx_flags |= RxAJAB;
 #endif
@@ -1750,7 +1750,7 @@ static int sis900_rx(struct net_device *net_dev)
                data_size = rx_status & DSIZE;
                rx_size = data_size - CRC_SIZE;
 
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
                /* ``TOOLONG'' flag means jumbo packet received. */
                if ((rx_status & TOOLONG) && data_size <= MAX_FRAME_SIZE)
                        rx_status &= (~ ((unsigned int)TOOLONG));
index 7d430d3..f0da3dc 100644 (file)
@@ -310,7 +310,7 @@ enum sis630_revision_id {
 #define CRC_SIZE                4
 #define MAC_HEADER_SIZE         14
 
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
 #define MAX_FRAME_SIZE  (1518 + 4)
 #else
 #define MAX_FRAME_SIZE  1518
index 726b80f..503a3b6 100644 (file)
@@ -2275,6 +2275,13 @@ static int smc_drv_probe(struct platform_device *pdev)
        if (pd) {
                memcpy(&lp->cfg, pd, sizeof(lp->cfg));
                lp->io_shift = SMC91X_IO_SHIFT(lp->cfg.flags);
+
+               if (!SMC_8BIT(lp) && !SMC_16BIT(lp)) {
+                       dev_err(&pdev->dev,
+                               "at least one of 8-bit or 16-bit access support is required.\n");
+                       ret = -ENXIO;
+                       goto out_free_netdev;
+               }
        }
 
 #if IS_BUILTIN(CONFIG_OF)
index 1a55c79..ea84654 100644 (file)
 #include <linux/dmaengine.h>
 #include <linux/smc91x.h>
 
+/*
+ * Any 16-bit access is performed with two 8-bit accesses if the hardware
+ * can't do it directly. Most registers are 16-bit so those are mandatory.
+ */
+#define SMC_outw_b(x, a, r)                                            \
+       do {                                                            \
+               unsigned int __val16 = (x);                             \
+               unsigned int __reg = (r);                               \
+               SMC_outb(__val16, a, __reg);                            \
+               SMC_outb(__val16 >> 8, a, __reg + (1 << SMC_IO_SHIFT)); \
+       } while (0)
+
+#define SMC_inw_b(a, r)                                                        \
+       ({                                                              \
+               unsigned int __val16;                                   \
+               unsigned int __reg = r;                                 \
+               __val16  = SMC_inb(a, __reg);                           \
+               __val16 |= SMC_inb(a, __reg + (1 << SMC_IO_SHIFT)) << 8; \
+               __val16;                                                \
+       })
+
 /*
  * Define your architecture specific bus configuration parameters here.
  */
 #define SMC_IO_SHIFT           (lp->io_shift)
 
 #define SMC_inb(a, r)          readb((a) + (r))
-#define SMC_inw(a, r)          readw((a) + (r))
+#define SMC_inw(a, r)                                                  \
+       ({                                                              \
+               unsigned int __smc_r = r;                               \
+               SMC_16BIT(lp) ? readw((a) + __smc_r) :                  \
+               SMC_8BIT(lp) ? SMC_inw_b(a, __smc_r) :                  \
+               ({ BUG(); 0; });                                        \
+       })
+
 #define SMC_inl(a, r)          readl((a) + (r))
 #define SMC_outb(v, a, r)      writeb(v, (a) + (r))
+#define SMC_outw(v, a, r)                                              \
+       do {                                                            \
+               unsigned int __v = v, __smc_r = r;                      \
+               if (SMC_16BIT(lp))                                      \
+                       __SMC_outw(__v, a, __smc_r);                    \
+               else if (SMC_8BIT(lp))                                  \
+                       SMC_outw_b(__v, a, __smc_r);                    \
+               else                                                    \
+                       BUG();                                          \
+       } while (0)
+
 #define SMC_outl(v, a, r)      writel(v, (a) + (r))
+#define SMC_insb(a, r, p, l)   readsb((a) + (r), p, l)
+#define SMC_outsb(a, r, p, l)  writesb((a) + (r), p, l)
 #define SMC_insw(a, r, p, l)   readsw((a) + (r), p, l)
 #define SMC_outsw(a, r, p, l)  writesw((a) + (r), p, l)
 #define SMC_insl(a, r, p, l)   readsl((a) + (r), p, l)
 #define SMC_IRQ_FLAGS          (-1)    /* from resource */
 
 /* We actually can't write halfwords properly if not word aligned */
-static inline void SMC_outw(u16 val, void __iomem *ioaddr, int reg)
+static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg)
 {
        if ((machine_is_mainstone() || machine_is_stargate2() ||
             machine_is_pxa_idp()) && reg & 2) {
@@ -416,24 +457,8 @@ smc_pxa_dma_insw(void __iomem *ioaddr, struct smc_local *lp, int reg, int dma,
 
 #if ! SMC_CAN_USE_16BIT
 
-/*
- * Any 16-bit access is performed with two 8-bit accesses if the hardware
- * can't do it directly. Most registers are 16-bit so those are mandatory.
- */
-#define SMC_outw(x, ioaddr, reg)                                       \
-       do {                                                            \
-               unsigned int __val16 = (x);                             \
-               SMC_outb( __val16, ioaddr, reg );                       \
-               SMC_outb( __val16 >> 8, ioaddr, reg + (1 << SMC_IO_SHIFT));\
-       } while (0)
-#define SMC_inw(ioaddr, reg)                                           \
-       ({                                                              \
-               unsigned int __val16;                                   \
-               __val16 =  SMC_inb( ioaddr, reg );                      \
-               __val16 |= SMC_inb( ioaddr, reg + (1 << SMC_IO_SHIFT)) << 8; \
-               __val16;                                                \
-       })
-
+#define SMC_outw(x, ioaddr, reg)       SMC_outw_b(x, ioaddr, reg)
+#define SMC_inw(ioaddr, reg)           SMC_inw_b(ioaddr, reg)
 #define SMC_insw(a, r, p, l)           BUG()
 #define SMC_outsw(a, r, p, l)          BUG()
 
@@ -445,7 +470,9 @@ smc_pxa_dma_insw(void __iomem *ioaddr, struct smc_local *lp, int reg, int dma,
 #endif
 
 #if ! SMC_CAN_USE_8BIT
+#undef SMC_inb
 #define SMC_inb(ioaddr, reg)           ({ BUG(); 0; })
+#undef SMC_outb
 #define SMC_outb(x, ioaddr, reg)       BUG()
 #define SMC_insb(a, r, p, l)           BUG()
 #define SMC_outsb(a, r, p, l)          BUG()
index ca31345..e9b8579 100644 (file)
@@ -62,6 +62,7 @@
 #include <linux/acpi.h>
 #include <linux/pm_runtime.h>
 #include <linux/property.h>
+#include <linux/gpio/consumer.h>
 
 #include "smsc911x.h"
 
@@ -147,6 +148,9 @@ struct smsc911x_data {
        /* regulators */
        struct regulator_bulk_data supplies[SMSC911X_NUM_SUPPLIES];
 
+       /* Reset GPIO */
+       struct gpio_desc *reset_gpiod;
+
        /* clock */
        struct clk *clk;
 };
@@ -438,6 +442,11 @@ static int smsc911x_request_resources(struct platform_device *pdev)
                netdev_err(ndev, "couldn't get regulators %d\n",
                                ret);
 
+       /* Request optional RESET GPIO */
+       pdata->reset_gpiod = devm_gpiod_get_optional(&pdev->dev,
+                                                    "reset",
+                                                    GPIOD_OUT_LOW);
+
        /* Request clock */
        pdata->clk = clk_get(&pdev->dev, NULL);
        if (IS_ERR(pdata->clk))
@@ -1099,15 +1108,8 @@ static int smsc911x_mii_init(struct platform_device *pdev,
                goto err_out_free_bus_2;
        }
 
-       if (smsc911x_mii_probe(dev) < 0) {
-               SMSC_WARN(pdata, probe, "Error registering mii bus");
-               goto err_out_unregister_bus_3;
-       }
-
        return 0;
 
-err_out_unregister_bus_3:
-       mdiobus_unregister(pdata->mii_bus);
 err_out_free_bus_2:
        mdiobus_free(pdata->mii_bus);
 err_out_1:
@@ -1514,23 +1516,90 @@ static void smsc911x_disable_irq_chip(struct net_device *dev)
        smsc911x_reg_write(pdata, INT_STS, 0xFFFFFFFF);
 }
 
+static irqreturn_t smsc911x_irqhandler(int irq, void *dev_id)
+{
+       struct net_device *dev = dev_id;
+       struct smsc911x_data *pdata = netdev_priv(dev);
+       u32 intsts = smsc911x_reg_read(pdata, INT_STS);
+       u32 inten = smsc911x_reg_read(pdata, INT_EN);
+       int serviced = IRQ_NONE;
+       u32 temp;
+
+       if (unlikely(intsts & inten & INT_STS_SW_INT_)) {
+               temp = smsc911x_reg_read(pdata, INT_EN);
+               temp &= (~INT_EN_SW_INT_EN_);
+               smsc911x_reg_write(pdata, INT_EN, temp);
+               smsc911x_reg_write(pdata, INT_STS, INT_STS_SW_INT_);
+               pdata->software_irq_signal = 1;
+               smp_wmb();
+               serviced = IRQ_HANDLED;
+       }
+
+       if (unlikely(intsts & inten & INT_STS_RXSTOP_INT_)) {
+               /* Called when there is a multicast update scheduled and
+                * it is now safe to complete the update */
+               SMSC_TRACE(pdata, intr, "RX Stop interrupt");
+               smsc911x_reg_write(pdata, INT_STS, INT_STS_RXSTOP_INT_);
+               if (pdata->multicast_update_pending)
+                       smsc911x_rx_multicast_update_workaround(pdata);
+               serviced = IRQ_HANDLED;
+       }
+
+       if (intsts & inten & INT_STS_TDFA_) {
+               temp = smsc911x_reg_read(pdata, FIFO_INT);
+               temp |= FIFO_INT_TX_AVAIL_LEVEL_;
+               smsc911x_reg_write(pdata, FIFO_INT, temp);
+               smsc911x_reg_write(pdata, INT_STS, INT_STS_TDFA_);
+               netif_wake_queue(dev);
+               serviced = IRQ_HANDLED;
+       }
+
+       if (unlikely(intsts & inten & INT_STS_RXE_)) {
+               SMSC_TRACE(pdata, intr, "RX Error interrupt");
+               smsc911x_reg_write(pdata, INT_STS, INT_STS_RXE_);
+               serviced = IRQ_HANDLED;
+       }
+
+       if (likely(intsts & inten & INT_STS_RSFL_)) {
+               if (likely(napi_schedule_prep(&pdata->napi))) {
+                       /* Disable Rx interrupts */
+                       temp = smsc911x_reg_read(pdata, INT_EN);
+                       temp &= (~INT_EN_RSFL_EN_);
+                       smsc911x_reg_write(pdata, INT_EN, temp);
+                       /* Schedule a NAPI poll */
+                       __napi_schedule(&pdata->napi);
+               } else {
+                       SMSC_WARN(pdata, rx_err, "napi_schedule_prep failed");
+               }
+               serviced = IRQ_HANDLED;
+       }
+
+       return serviced;
+}
+
 static int smsc911x_open(struct net_device *dev)
 {
        struct smsc911x_data *pdata = netdev_priv(dev);
        unsigned int timeout;
        unsigned int temp;
        unsigned int intcfg;
+       int retval;
+       int irq_flags;
 
-       /* if the phy is not yet registered, retry later*/
+       /* find and start the given phy */
        if (!dev->phydev) {
-               SMSC_WARN(pdata, hw, "phy_dev is NULL");
-               return -EAGAIN;
+               retval = smsc911x_mii_probe(dev);
+               if (retval < 0) {
+                       SMSC_WARN(pdata, probe, "Error starting phy");
+                       goto out;
+               }
        }
 
        /* Reset the LAN911x */
-       if (smsc911x_soft_reset(pdata)) {
+       retval = smsc911x_soft_reset(pdata);
+       if (retval) {
                SMSC_WARN(pdata, hw, "soft reset failed");
-               return -EIO;
+               goto mii_free_out;
        }
 
        smsc911x_reg_write(pdata, HW_CFG, 0x00050000);
@@ -1586,6 +1655,15 @@ static int smsc911x_open(struct net_device *dev)
        pdata->software_irq_signal = 0;
        smp_wmb();
 
+       irq_flags = irq_get_trigger_type(dev->irq);
+       retval = request_irq(dev->irq, smsc911x_irqhandler,
+                            irq_flags | IRQF_SHARED, dev->name, dev);
+       if (retval) {
+               SMSC_WARN(pdata, probe,
+                         "Unable to claim requested irq: %d", dev->irq);
+               goto mii_free_out;
+       }
+
        temp = smsc911x_reg_read(pdata, INT_EN);
        temp |= INT_EN_SW_INT_EN_;
        smsc911x_reg_write(pdata, INT_EN, temp);
@@ -1600,7 +1678,8 @@ static int smsc911x_open(struct net_device *dev)
        if (!pdata->software_irq_signal) {
                netdev_warn(dev, "ISR failed signaling test (IRQ %d)\n",
                            dev->irq);
-               return -ENODEV;
+               retval = -ENODEV;
+               goto irq_stop_out;
        }
        SMSC_TRACE(pdata, ifup, "IRQ handler passed test using IRQ %d",
                   dev->irq);
@@ -1646,6 +1725,14 @@ static int smsc911x_open(struct net_device *dev)
 
        netif_start_queue(dev);
        return 0;
+
+irq_stop_out:
+       free_irq(dev->irq, dev);
+mii_free_out:
+       phy_disconnect(dev->phydev);
+       dev->phydev = NULL;
+out:
+       return retval;
 }
 
 /* Entry point for stopping the interface */
@@ -1667,9 +1754,15 @@ static int smsc911x_stop(struct net_device *dev)
        dev->stats.rx_dropped += smsc911x_reg_read(pdata, RX_DROP);
        smsc911x_tx_update_txcounters(dev);
 
+       free_irq(dev->irq, dev);
+
        /* Bring the PHY down */
-       if (dev->phydev)
+       if (dev->phydev) {
                phy_stop(dev->phydev);
+               phy_disconnect(dev->phydev);
+               dev->phydev = NULL;
+       }
+       netif_carrier_off(dev);
 
        SMSC_TRACE(pdata, ifdown, "Interface stopped");
        return 0;
@@ -1811,67 +1904,6 @@ static void smsc911x_set_multicast_list(struct net_device *dev)
        spin_unlock_irqrestore(&pdata->mac_lock, flags);
 }
 
-static irqreturn_t smsc911x_irqhandler(int irq, void *dev_id)
-{
-       struct net_device *dev = dev_id;
-       struct smsc911x_data *pdata = netdev_priv(dev);
-       u32 intsts = smsc911x_reg_read(pdata, INT_STS);
-       u32 inten = smsc911x_reg_read(pdata, INT_EN);
-       int serviced = IRQ_NONE;
-       u32 temp;
-
-       if (unlikely(intsts & inten & INT_STS_SW_INT_)) {
-               temp = smsc911x_reg_read(pdata, INT_EN);
-               temp &= (~INT_EN_SW_INT_EN_);
-               smsc911x_reg_write(pdata, INT_EN, temp);
-               smsc911x_reg_write(pdata, INT_STS, INT_STS_SW_INT_);
-               pdata->software_irq_signal = 1;
-               smp_wmb();
-               serviced = IRQ_HANDLED;
-       }
-
-       if (unlikely(intsts & inten & INT_STS_RXSTOP_INT_)) {
-               /* Called when there is a multicast update scheduled and
-                * it is now safe to complete the update */
-               SMSC_TRACE(pdata, intr, "RX Stop interrupt");
-               smsc911x_reg_write(pdata, INT_STS, INT_STS_RXSTOP_INT_);
-               if (pdata->multicast_update_pending)
-                       smsc911x_rx_multicast_update_workaround(pdata);
-               serviced = IRQ_HANDLED;
-       }
-
-       if (intsts & inten & INT_STS_TDFA_) {
-               temp = smsc911x_reg_read(pdata, FIFO_INT);
-               temp |= FIFO_INT_TX_AVAIL_LEVEL_;
-               smsc911x_reg_write(pdata, FIFO_INT, temp);
-               smsc911x_reg_write(pdata, INT_STS, INT_STS_TDFA_);
-               netif_wake_queue(dev);
-               serviced = IRQ_HANDLED;
-       }
-
-       if (unlikely(intsts & inten & INT_STS_RXE_)) {
-               SMSC_TRACE(pdata, intr, "RX Error interrupt");
-               smsc911x_reg_write(pdata, INT_STS, INT_STS_RXE_);
-               serviced = IRQ_HANDLED;
-       }
-
-       if (likely(intsts & inten & INT_STS_RSFL_)) {
-               if (likely(napi_schedule_prep(&pdata->napi))) {
-                       /* Disable Rx interrupts */
-                       temp = smsc911x_reg_read(pdata, INT_EN);
-                       temp &= (~INT_EN_RSFL_EN_);
-                       smsc911x_reg_write(pdata, INT_EN, temp);
-                       /* Schedule a NAPI poll */
-                       __napi_schedule(&pdata->napi);
-               } else {
-                       SMSC_WARN(pdata, rx_err, "napi_schedule_prep failed");
-               }
-               serviced = IRQ_HANDLED;
-       }
-
-       return serviced;
-}
-
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void smsc911x_poll_controller(struct net_device *dev)
 {
@@ -2291,16 +2323,14 @@ static int smsc911x_drv_remove(struct platform_device *pdev)
        pdata = netdev_priv(dev);
        BUG_ON(!pdata);
        BUG_ON(!pdata->ioaddr);
-       BUG_ON(!dev->phydev);
+       WARN_ON(dev->phydev);
 
        SMSC_TRACE(pdata, ifdown, "Stopping driver");
 
-       phy_disconnect(dev->phydev);
        mdiobus_unregister(pdata->mii_bus);
        mdiobus_free(pdata->mii_bus);
 
        unregister_netdev(dev);
-       free_irq(dev->irq, dev);
        res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
                                           "smsc911x-memory");
        if (!res)
@@ -2385,8 +2415,7 @@ static int smsc911x_drv_probe(struct platform_device *pdev)
        struct smsc911x_data *pdata;
        struct smsc911x_platform_config *config = dev_get_platdata(&pdev->dev);
        struct resource *res;
-       unsigned int intcfg = 0;
-       int res_size, irq, irq_flags;
+       int res_size, irq;
        int retval;
 
        res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
@@ -2425,7 +2454,6 @@ static int smsc911x_drv_probe(struct platform_device *pdev)
 
        pdata = netdev_priv(dev);
        dev->irq = irq;
-       irq_flags = irq_get_trigger_type(irq);
        pdata->ioaddr = ioremap_nocache(res->start, res_size);
 
        pdata->dev = dev;
@@ -2472,43 +2500,23 @@ static int smsc911x_drv_probe(struct platform_device *pdev)
        if (retval < 0)
                goto out_disable_resources;
 
-       /* configure irq polarity and type before connecting isr */
-       if (pdata->config.irq_polarity == SMSC911X_IRQ_POLARITY_ACTIVE_HIGH)
-               intcfg |= INT_CFG_IRQ_POL_;
-
-       if (pdata->config.irq_type == SMSC911X_IRQ_TYPE_PUSH_PULL)
-               intcfg |= INT_CFG_IRQ_TYPE_;
-
-       smsc911x_reg_write(pdata, INT_CFG, intcfg);
-
-       /* Ensure interrupts are globally disabled before connecting ISR */
-       smsc911x_disable_irq_chip(dev);
+       netif_carrier_off(dev);
 
-       retval = request_irq(dev->irq, smsc911x_irqhandler,
-                            irq_flags | IRQF_SHARED, dev->name, dev);
+       retval = smsc911x_mii_init(pdev, dev);
        if (retval) {
-               SMSC_WARN(pdata, probe,
-                         "Unable to claim requested irq: %d", dev->irq);
+               SMSC_WARN(pdata, probe, "Error %i initialising mii", retval);
                goto out_disable_resources;
        }
 
-       netif_carrier_off(dev);
-
        retval = register_netdev(dev);
        if (retval) {
                SMSC_WARN(pdata, probe, "Error %i registering device", retval);
-               goto out_free_irq;
+               goto out_disable_resources;
        } else {
                SMSC_TRACE(pdata, probe,
                           "Network interface: \"%s\"", dev->name);
        }
 
-       retval = smsc911x_mii_init(pdev, dev);
-       if (retval) {
-               SMSC_WARN(pdata, probe, "Error %i initialising mii", retval);
-               goto out_unregister_netdev_5;
-       }
-
        spin_lock_irq(&pdata->mac_lock);
 
        /* Check if mac address has been specified when bringing interface up */
@@ -2544,10 +2552,6 @@ static int smsc911x_drv_probe(struct platform_device *pdev)
 
        return 0;
 
-out_unregister_netdev_5:
-       unregister_netdev(dev);
-out_free_irq:
-       free_irq(dev->irq, dev);
 out_disable_resources:
        pm_runtime_put(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
index 8f06a66..c732b8c 100644 (file)
@@ -104,6 +104,18 @@ config DWMAC_STI
          device driver. This driver is used on for the STi series
          SOCs GMAC ethernet controller.
 
+config DWMAC_STM32
+       tristate "STM32 DWMAC support"
+       default ARCH_STM32
+       depends on OF && HAS_IOMEM
+       select MFD_SYSCON
+       ---help---
+         Support for ethernet controller on STM32 SOCs.
+
+         This selects STM32 SoC glue layer support for the stmmac
+         device driver. This driver is used on for the STM32 series
+         SOCs GMAC ethernet controller.
+
 config DWMAC_SUNXI
        tristate "Allwinner GMAC support"
        default ARCH_SUNXI
index 44b630c..f0c9396 100644 (file)
@@ -13,6 +13,7 @@ obj-$(CONFIG_DWMAC_MESON)     += dwmac-meson.o
 obj-$(CONFIG_DWMAC_ROCKCHIP)   += dwmac-rk.o
 obj-$(CONFIG_DWMAC_SOCFPGA)    += dwmac-altr-socfpga.o
 obj-$(CONFIG_DWMAC_STI)                += dwmac-sti.o
+obj-$(CONFIG_DWMAC_STM32)      += dwmac-stm32.o
 obj-$(CONFIG_DWMAC_SUNXI)      += dwmac-sunxi.o
 obj-$(CONFIG_DWMAC_GENERIC)    += dwmac-generic.o
 stmmac-platform-objs:= stmmac_platform.o
index 2533b91..d3292c4 100644 (file)
@@ -30,7 +30,7 @@
 #include <linux/stmmac.h>
 #include <linux/phy.h>
 #include <linux/module.h>
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
 #define STMMAC_VLAN_TAG_USED
 #include <linux/if_vlan.h>
 #endif
index 9210591..3740a44 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/delay.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
+#include <linux/pm_runtime.h>
 
 #include "stmmac_platform.h"
 
@@ -301,6 +302,118 @@ static const struct rk_gmac_ops rk3288_ops = {
        .set_rmii_speed = rk3288_set_rmii_speed,
 };
 
+#define RK3366_GRF_SOC_CON6    0x0418
+#define RK3366_GRF_SOC_CON7    0x041c
+
+/* RK3366_GRF_SOC_CON6 */
+#define RK3366_GMAC_PHY_INTF_SEL_RGMII (GRF_BIT(9) | GRF_CLR_BIT(10) | \
+                                        GRF_CLR_BIT(11))
+#define RK3366_GMAC_PHY_INTF_SEL_RMII  (GRF_CLR_BIT(9) | GRF_CLR_BIT(10) | \
+                                        GRF_BIT(11))
+#define RK3366_GMAC_FLOW_CTRL          GRF_BIT(8)
+#define RK3366_GMAC_FLOW_CTRL_CLR      GRF_CLR_BIT(8)
+#define RK3366_GMAC_SPEED_10M          GRF_CLR_BIT(7)
+#define RK3366_GMAC_SPEED_100M         GRF_BIT(7)
+#define RK3366_GMAC_RMII_CLK_25M       GRF_BIT(3)
+#define RK3366_GMAC_RMII_CLK_2_5M      GRF_CLR_BIT(3)
+#define RK3366_GMAC_CLK_125M           (GRF_CLR_BIT(4) | GRF_CLR_BIT(5))
+#define RK3366_GMAC_CLK_25M            (GRF_BIT(4) | GRF_BIT(5))
+#define RK3366_GMAC_CLK_2_5M           (GRF_CLR_BIT(4) | GRF_BIT(5))
+#define RK3366_GMAC_RMII_MODE          GRF_BIT(6)
+#define RK3366_GMAC_RMII_MODE_CLR      GRF_CLR_BIT(6)
+
+/* RK3366_GRF_SOC_CON7 */
+#define RK3366_GMAC_TXCLK_DLY_ENABLE   GRF_BIT(7)
+#define RK3366_GMAC_TXCLK_DLY_DISABLE  GRF_CLR_BIT(7)
+#define RK3366_GMAC_RXCLK_DLY_ENABLE   GRF_BIT(15)
+#define RK3366_GMAC_RXCLK_DLY_DISABLE  GRF_CLR_BIT(15)
+#define RK3366_GMAC_CLK_RX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 8)
+#define RK3366_GMAC_CLK_TX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 0)
+
+static void rk3366_set_to_rgmii(struct rk_priv_data *bsp_priv,
+                               int tx_delay, int rx_delay)
+{
+       struct device *dev = &bsp_priv->pdev->dev;
+
+       if (IS_ERR(bsp_priv->grf)) {
+               dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+               return;
+       }
+
+       regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+                    RK3366_GMAC_PHY_INTF_SEL_RGMII |
+                    RK3366_GMAC_RMII_MODE_CLR);
+       regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON7,
+                    RK3366_GMAC_RXCLK_DLY_ENABLE |
+                    RK3366_GMAC_TXCLK_DLY_ENABLE |
+                    RK3366_GMAC_CLK_RX_DL_CFG(rx_delay) |
+                    RK3366_GMAC_CLK_TX_DL_CFG(tx_delay));
+}
+
+static void rk3366_set_to_rmii(struct rk_priv_data *bsp_priv)
+{
+       struct device *dev = &bsp_priv->pdev->dev;
+
+       if (IS_ERR(bsp_priv->grf)) {
+               dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+               return;
+       }
+
+       regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+                    RK3366_GMAC_PHY_INTF_SEL_RMII | RK3366_GMAC_RMII_MODE);
+}
+
+static void rk3366_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+       struct device *dev = &bsp_priv->pdev->dev;
+
+       if (IS_ERR(bsp_priv->grf)) {
+               dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+               return;
+       }
+
+       if (speed == 10)
+               regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+                            RK3366_GMAC_CLK_2_5M);
+       else if (speed == 100)
+               regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+                            RK3366_GMAC_CLK_25M);
+       else if (speed == 1000)
+               regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+                            RK3366_GMAC_CLK_125M);
+       else
+               dev_err(dev, "unknown speed value for RGMII! speed=%d", speed);
+}
+
+static void rk3366_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+       struct device *dev = &bsp_priv->pdev->dev;
+
+       if (IS_ERR(bsp_priv->grf)) {
+               dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+               return;
+       }
+
+       if (speed == 10) {
+               regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+                            RK3366_GMAC_RMII_CLK_2_5M |
+                            RK3366_GMAC_SPEED_10M);
+       } else if (speed == 100) {
+               regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+                            RK3366_GMAC_RMII_CLK_25M |
+                            RK3366_GMAC_SPEED_100M);
+       } else {
+               dev_err(dev, "unknown speed value for RMII! speed=%d", speed);
+       }
+}
+
+static const struct rk_gmac_ops rk3366_ops = {
+       .set_to_rgmii = rk3366_set_to_rgmii,
+       .set_to_rmii = rk3366_set_to_rmii,
+       .set_rgmii_speed = rk3366_set_rgmii_speed,
+       .set_rmii_speed = rk3366_set_rmii_speed,
+};
+
 #define RK3368_GRF_SOC_CON15   0x043c
 #define RK3368_GRF_SOC_CON16   0x0440
 
@@ -413,6 +526,118 @@ static const struct rk_gmac_ops rk3368_ops = {
        .set_rmii_speed = rk3368_set_rmii_speed,
 };
 
+#define RK3399_GRF_SOC_CON5    0xc214
+#define RK3399_GRF_SOC_CON6    0xc218
+
+/* RK3399_GRF_SOC_CON5 */
+#define RK3399_GMAC_PHY_INTF_SEL_RGMII (GRF_BIT(9) | GRF_CLR_BIT(10) | \
+                                        GRF_CLR_BIT(11))
+#define RK3399_GMAC_PHY_INTF_SEL_RMII  (GRF_CLR_BIT(9) | GRF_CLR_BIT(10) | \
+                                        GRF_BIT(11))
+#define RK3399_GMAC_FLOW_CTRL          GRF_BIT(8)
+#define RK3399_GMAC_FLOW_CTRL_CLR      GRF_CLR_BIT(8)
+#define RK3399_GMAC_SPEED_10M          GRF_CLR_BIT(7)
+#define RK3399_GMAC_SPEED_100M         GRF_BIT(7)
+#define RK3399_GMAC_RMII_CLK_25M       GRF_BIT(3)
+#define RK3399_GMAC_RMII_CLK_2_5M      GRF_CLR_BIT(3)
+#define RK3399_GMAC_CLK_125M           (GRF_CLR_BIT(4) | GRF_CLR_BIT(5))
+#define RK3399_GMAC_CLK_25M            (GRF_BIT(4) | GRF_BIT(5))
+#define RK3399_GMAC_CLK_2_5M           (GRF_CLR_BIT(4) | GRF_BIT(5))
+#define RK3399_GMAC_RMII_MODE          GRF_BIT(6)
+#define RK3399_GMAC_RMII_MODE_CLR      GRF_CLR_BIT(6)
+
+/* RK3399_GRF_SOC_CON6 */
+#define RK3399_GMAC_TXCLK_DLY_ENABLE   GRF_BIT(7)
+#define RK3399_GMAC_TXCLK_DLY_DISABLE  GRF_CLR_BIT(7)
+#define RK3399_GMAC_RXCLK_DLY_ENABLE   GRF_BIT(15)
+#define RK3399_GMAC_RXCLK_DLY_DISABLE  GRF_CLR_BIT(15)
+#define RK3399_GMAC_CLK_RX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 8)
+#define RK3399_GMAC_CLK_TX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 0)
+
+static void rk3399_set_to_rgmii(struct rk_priv_data *bsp_priv,
+                               int tx_delay, int rx_delay)
+{
+       struct device *dev = &bsp_priv->pdev->dev;
+
+       if (IS_ERR(bsp_priv->grf)) {
+               dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+               return;
+       }
+
+       regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+                    RK3399_GMAC_PHY_INTF_SEL_RGMII |
+                    RK3399_GMAC_RMII_MODE_CLR);
+       regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON6,
+                    RK3399_GMAC_RXCLK_DLY_ENABLE |
+                    RK3399_GMAC_TXCLK_DLY_ENABLE |
+                    RK3399_GMAC_CLK_RX_DL_CFG(rx_delay) |
+                    RK3399_GMAC_CLK_TX_DL_CFG(tx_delay));
+}
+
+static void rk3399_set_to_rmii(struct rk_priv_data *bsp_priv)
+{
+       struct device *dev = &bsp_priv->pdev->dev;
+
+       if (IS_ERR(bsp_priv->grf)) {
+               dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+               return;
+       }
+
+       regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+                    RK3399_GMAC_PHY_INTF_SEL_RMII | RK3399_GMAC_RMII_MODE);
+}
+
+static void rk3399_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+       struct device *dev = &bsp_priv->pdev->dev;
+
+       if (IS_ERR(bsp_priv->grf)) {
+               dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+               return;
+       }
+
+       if (speed == 10)
+               regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+                            RK3399_GMAC_CLK_2_5M);
+       else if (speed == 100)
+               regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+                            RK3399_GMAC_CLK_25M);
+       else if (speed == 1000)
+               regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+                            RK3399_GMAC_CLK_125M);
+       else
+               dev_err(dev, "unknown speed value for RGMII! speed=%d", speed);
+}
+
+static void rk3399_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+       struct device *dev = &bsp_priv->pdev->dev;
+
+       if (IS_ERR(bsp_priv->grf)) {
+               dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+               return;
+       }
+
+       if (speed == 10) {
+               regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+                            RK3399_GMAC_RMII_CLK_2_5M |
+                            RK3399_GMAC_SPEED_10M);
+       } else if (speed == 100) {
+               regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+                            RK3399_GMAC_RMII_CLK_25M |
+                            RK3399_GMAC_SPEED_100M);
+       } else {
+               dev_err(dev, "unknown speed value for RMII! speed=%d", speed);
+       }
+}
+
+static const struct rk_gmac_ops rk3399_ops = {
+       .set_to_rgmii = rk3399_set_to_rgmii,
+       .set_to_rmii = rk3399_set_to_rmii,
+       .set_rgmii_speed = rk3399_set_rgmii_speed,
+       .set_rmii_speed = rk3399_set_rmii_speed,
+};
+
 static int gmac_clk_init(struct rk_priv_data *bsp_priv)
 {
        struct device *dev = &bsp_priv->pdev->dev;
@@ -629,6 +854,16 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev,
                                                        "rockchip,grf");
        bsp_priv->pdev = pdev;
 
+       gmac_clk_init(bsp_priv);
+
+       return bsp_priv;
+}
+
+static int rk_gmac_powerup(struct rk_priv_data *bsp_priv)
+{
+       int ret;
+       struct device *dev = &bsp_priv->pdev->dev;
+
        /*rmii or rgmii*/
        if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RGMII) {
                dev_info(dev, "init for RGMII\n");
@@ -641,15 +876,6 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev,
                dev_err(dev, "NO interface defined!\n");
        }
 
-       gmac_clk_init(bsp_priv);
-
-       return bsp_priv;
-}
-
-static int rk_gmac_powerup(struct rk_priv_data *bsp_priv)
-{
-       int ret;
-
        ret = phy_power_on(bsp_priv, true);
        if (ret)
                return ret;
@@ -658,11 +884,19 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv)
        if (ret)
                return ret;
 
+       pm_runtime_enable(dev);
+       pm_runtime_get_sync(dev);
+
        return 0;
 }
 
 static void rk_gmac_powerdown(struct rk_priv_data *gmac)
 {
+       struct device *dev = &gmac->pdev->dev;
+
+       pm_runtime_put_sync(dev);
+       pm_runtime_disable(dev);
+
        phy_power_on(gmac, false);
        gmac_clk_enable(gmac, false);
 }
@@ -760,7 +994,9 @@ static int rk_gmac_probe(struct platform_device *pdev)
 static const struct of_device_id rk_gmac_dwmac_match[] = {
        { .compatible = "rockchip,rk3228-gmac", .data = &rk3228_ops },
        { .compatible = "rockchip,rk3288-gmac", .data = &rk3288_ops },
+       { .compatible = "rockchip,rk3366-gmac", .data = &rk3366_ops },
        { .compatible = "rockchip,rk3368-gmac", .data = &rk3368_ops },
+       { .compatible = "rockchip,rk3399-gmac", .data = &rk3399_ops },
        { }
 };
 MODULE_DEVICE_TABLE(of, rk_gmac_dwmac_match);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
new file mode 100644 (file)
index 0000000..e5a926b
--- /dev/null
@@ -0,0 +1,194 @@
+/*
+ * dwmac-stm32.c - DWMAC Specific Glue layer for STM32 MCU
+ *
+ * Copyright (C) Alexandre Torgue 2015
+ * Author:  Alexandre Torgue <alexandre.torgue@gmail.com>
+ * License terms:  GNU General Public License (GPL), version 2
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_net.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/stmmac.h>
+
+#include "stmmac_platform.h"
+
+#define MII_PHY_SEL_MASK       BIT(23)
+
+struct stm32_dwmac {
+       struct clk *clk_tx;
+       struct clk *clk_rx;
+       u32 mode_reg;           /* MAC glue-logic mode register */
+       struct regmap *regmap;
+       u32 speed;
+};
+
+static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat)
+{
+       struct stm32_dwmac *dwmac = plat_dat->bsp_priv;
+       u32 reg = dwmac->mode_reg;
+       u32 val;
+       int ret;
+
+       val = (plat_dat->interface == PHY_INTERFACE_MODE_MII) ? 0 : 1;
+       ret = regmap_update_bits(dwmac->regmap, reg, MII_PHY_SEL_MASK, val);
+       if (ret)
+               return ret;
+
+       ret = clk_prepare_enable(dwmac->clk_tx);
+       if (ret)
+               return ret;
+
+       ret = clk_prepare_enable(dwmac->clk_rx);
+       if (ret)
+               clk_disable_unprepare(dwmac->clk_tx);
+
+       return ret;
+}
+
+static void stm32_dwmac_clk_disable(struct stm32_dwmac *dwmac)
+{
+       clk_disable_unprepare(dwmac->clk_tx);
+       clk_disable_unprepare(dwmac->clk_rx);
+}
+
+static int stm32_dwmac_parse_data(struct stm32_dwmac *dwmac,
+                                 struct device *dev)
+{
+       struct device_node *np = dev->of_node;
+       int err;
+
+       /*  Get TX/RX clocks */
+       dwmac->clk_tx = devm_clk_get(dev, "mac-clk-tx");
+       if (IS_ERR(dwmac->clk_tx)) {
+               dev_err(dev, "No tx clock provided...\n");
+               return PTR_ERR(dwmac->clk_tx);
+       }
+       dwmac->clk_rx = devm_clk_get(dev, "mac-clk-rx");
+       if (IS_ERR(dwmac->clk_rx)) {
+               dev_err(dev, "No rx clock provided...\n");
+               return PTR_ERR(dwmac->clk_rx);
+       }
+
+       /* Get mode register */
+       dwmac->regmap = syscon_regmap_lookup_by_phandle(np, "st,syscon");
+       if (IS_ERR(dwmac->regmap))
+               return PTR_ERR(dwmac->regmap);
+
+       err = of_property_read_u32_index(np, "st,syscon", 1, &dwmac->mode_reg);
+       if (err)
+               dev_err(dev, "Can't get sysconfig mode offset (%d)\n", err);
+
+       return err;
+}
+
+static int stm32_dwmac_probe(struct platform_device *pdev)
+{
+       struct plat_stmmacenet_data *plat_dat;
+       struct stmmac_resources stmmac_res;
+       struct stm32_dwmac *dwmac;
+       int ret;
+
+       ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+       if (ret)
+               return ret;
+
+       plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+       if (IS_ERR(plat_dat))
+               return PTR_ERR(plat_dat);
+
+       dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
+       if (!dwmac)
+               return -ENOMEM;
+
+       ret = stm32_dwmac_parse_data(dwmac, &pdev->dev);
+       if (ret) {
+               dev_err(&pdev->dev, "Unable to parse OF data\n");
+               return ret;
+       }
+
+       plat_dat->bsp_priv = dwmac;
+
+       ret = stm32_dwmac_init(plat_dat);
+       if (ret)
+               return ret;
+
+       ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+       if (ret)
+               stm32_dwmac_clk_disable(dwmac);
+
+       return ret;
+}
+
+static int stm32_dwmac_remove(struct platform_device *pdev)
+{
+       struct net_device *ndev = platform_get_drvdata(pdev);
+       struct stmmac_priv *priv = netdev_priv(ndev);
+       int ret = stmmac_dvr_remove(&pdev->dev);
+
+       stm32_dwmac_clk_disable(priv->plat->bsp_priv);
+
+       return ret;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int stm32_dwmac_suspend(struct device *dev)
+{
+       struct net_device *ndev = dev_get_drvdata(dev);
+       struct stmmac_priv *priv = netdev_priv(ndev);
+       int ret;
+
+       ret = stmmac_suspend(dev);
+       stm32_dwmac_clk_disable(priv->plat->bsp_priv);
+
+       return ret;
+}
+
+static int stm32_dwmac_resume(struct device *dev)
+{
+       struct net_device *ndev = dev_get_drvdata(dev);
+       struct stmmac_priv *priv = netdev_priv(ndev);
+       int ret;
+
+       ret = stm32_dwmac_init(priv->plat);
+       if (ret)
+               return ret;
+
+       ret = stmmac_resume(dev);
+
+       return ret;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(stm32_dwmac_pm_ops,
+       stm32_dwmac_suspend, stm32_dwmac_resume);
+
+static const struct of_device_id stm32_dwmac_match[] = {
+       { .compatible = "st,stm32-dwmac"},
+       { }
+};
+MODULE_DEVICE_TABLE(of, stm32_dwmac_match);
+
+static struct platform_driver stm32_dwmac_driver = {
+       .probe  = stm32_dwmac_probe,
+       .remove = stm32_dwmac_remove,
+       .driver = {
+               .name           = "stm32-dwmac",
+               .pm             = &stm32_dwmac_pm_ops,
+               .of_match_table = stm32_dwmac_match,
+       },
+};
+module_platform_driver(stm32_dwmac_driver);
+
+MODULE_AUTHOR("Alexandre Torgue <alexandre.torgue@gmail.com>");
+MODULE_DESCRIPTION("STMicroelectronics MCU DWMAC Specific Glue layer");
+MODULE_LICENSE("GPL v2");
index cbefe9e..6f6bbc5 100644 (file)
@@ -145,7 +145,7 @@ static void dwmac1000_set_mchash(void __iomem *ioaddr, u32 *mcfilterbits,
                numhashregs = 8;
                break;
        default:
-               pr_debug("STMMAC: err in setting mulitcast filter\n");
+               pr_debug("STMMAC: err in setting multicast filter\n");
                return;
                break;
        }
index 756bb54..0a0d6a8 100644 (file)
@@ -265,6 +265,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
         * once needed on other platforms.
         */
        if (of_device_is_compatible(np, "st,spear600-gmac") ||
+               of_device_is_compatible(np, "snps,dwmac-3.50a") ||
                of_device_is_compatible(np, "snps,dwmac-3.70a") ||
                of_device_is_compatible(np, "snps,dwmac")) {
                /* Note that the max-frame-size parameter as defined in the
index 9f159a7..ef26f58 100644 (file)
@@ -1246,7 +1246,7 @@ static int dwceqos_mii_init(struct net_local *lp)
        lp->mii_bus->read  = &dwceqos_mdio_read;
        lp->mii_bus->write = &dwceqos_mdio_write;
        lp->mii_bus->priv = lp;
-       lp->mii_bus->parent = &lp->ndev->dev;
+       lp->mii_bus->parent = &lp->pdev->dev;
 
        of_address_to_resource(lp->pdev->dev.of_node, 0, &res);
        snprintf(lp->mii_bus->id, MII_BUS_ID_SIZE, "%.8llx",
@@ -1622,13 +1622,7 @@ static void dwceqos_init_hw(struct net_local *lp)
                DWCEQOS_MMC_CTRL_RSTONRD);
        dwceqos_enable_mmc_interrupt(lp);
 
-       /* Enable Interrupts */
-       dwceqos_write(lp, REG_DWCEQOS_DMA_CH0_IE,
-                     DWCEQOS_DMA_CH0_IE_NIE |
-                     DWCEQOS_DMA_CH0_IE_RIE | DWCEQOS_DMA_CH0_IE_TIE |
-                     DWCEQOS_DMA_CH0_IE_AIE |
-                     DWCEQOS_DMA_CH0_IE_FBEE);
-
+       dwceqos_write(lp, REG_DWCEQOS_DMA_CH0_IE, 0);
        dwceqos_write(lp, REG_DWCEQOS_MAC_IE, 0);
 
        dwceqos_write(lp, REG_DWCEQOS_MAC_CFG, DWCEQOS_MAC_CFG_IPC |
@@ -1905,6 +1899,15 @@ static int dwceqos_open(struct net_device *ndev)
        netif_start_queue(ndev);
        tasklet_enable(&lp->tx_bdreclaim_tasklet);
 
+       /* Enable Interrupts -- do this only after we enable NAPI and the
+        * tasklet.
+        */
+       dwceqos_write(lp, REG_DWCEQOS_DMA_CH0_IE,
+                     DWCEQOS_DMA_CH0_IE_NIE |
+                     DWCEQOS_DMA_CH0_IE_RIE | DWCEQOS_DMA_CH0_IE_TIE |
+                     DWCEQOS_DMA_CH0_IE_AIE |
+                     DWCEQOS_DMA_CH0_IE_FBEE);
+
        return 0;
 }
 
@@ -2740,7 +2743,7 @@ static void dwceqos_set_msglevel(struct net_device *ndev, u32 msglevel)
        lp->msg_enable = msglevel;
 }
 
-static struct ethtool_ops dwceqos_ethtool_ops = {
+static const struct ethtool_ops dwceqos_ethtool_ops = {
        .get_drvinfo    = dwceqos_get_drvinfo,
        .get_link       = ethtool_op_get_link,
        .get_pauseparam = dwceqos_get_pauseparam,
@@ -2850,25 +2853,17 @@ static int dwceqos_probe(struct platform_device *pdev)
 
        ndev->features = ndev->hw_features;
 
-       netif_napi_add(ndev, &lp->napi, dwceqos_rx_poll, NAPI_POLL_WEIGHT);
-
-       ret = register_netdev(ndev);
-       if (ret) {
-               dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
-               goto err_out_clk_dis_aper;
-       }
-
        lp->phy_ref_clk = devm_clk_get(&pdev->dev, "phy_ref_clk");
        if (IS_ERR(lp->phy_ref_clk)) {
                dev_err(&pdev->dev, "phy_ref_clk clock not found.\n");
                ret = PTR_ERR(lp->phy_ref_clk);
-               goto err_out_unregister_netdev;
+               goto err_out_clk_dis_aper;
        }
 
        ret = clk_prepare_enable(lp->phy_ref_clk);
        if (ret) {
                dev_err(&pdev->dev, "Unable to enable device clock.\n");
-               goto err_out_unregister_netdev;
+               goto err_out_clk_dis_aper;
        }
 
        lp->phy_node = of_parse_phandle(lp->pdev->dev.of_node,
@@ -2877,7 +2872,7 @@ static int dwceqos_probe(struct platform_device *pdev)
                ret = of_phy_register_fixed_link(lp->pdev->dev.of_node);
                if (ret < 0) {
                        dev_err(&pdev->dev, "invalid fixed-link");
-                       goto err_out_unregister_clk_notifier;
+                       goto err_out_clk_dis_phy;
                }
 
                lp->phy_node = of_node_get(lp->pdev->dev.of_node);
@@ -2886,7 +2881,7 @@ static int dwceqos_probe(struct platform_device *pdev)
        ret = of_get_phy_mode(lp->pdev->dev.of_node);
        if (ret < 0) {
                dev_err(&lp->pdev->dev, "error in getting phy i/f\n");
-               goto err_out_unregister_clk_notifier;
+               goto err_out_clk_dis_phy;
        }
 
        lp->phy_interface = ret;
@@ -2894,14 +2889,14 @@ static int dwceqos_probe(struct platform_device *pdev)
        ret = dwceqos_mii_init(lp);
        if (ret) {
                dev_err(&lp->pdev->dev, "error in dwceqos_mii_init\n");
-               goto err_out_unregister_clk_notifier;
+               goto err_out_clk_dis_phy;
        }
 
        ret = dwceqos_mii_probe(ndev);
        if (ret != 0) {
                netdev_err(ndev, "mii_probe fail.\n");
                ret = -ENXIO;
-               goto err_out_unregister_clk_notifier;
+               goto err_out_clk_dis_phy;
        }
 
        dwceqos_set_umac_addr(lp, lp->ndev->dev_addr, 0);
@@ -2919,7 +2914,7 @@ static int dwceqos_probe(struct platform_device *pdev)
        if (ret) {
                dev_err(&lp->pdev->dev, "Unable to retrieve DT, error %d\n",
                        ret);
-               goto err_out_unregister_clk_notifier;
+               goto err_out_clk_dis_phy;
        }
        dev_info(&lp->pdev->dev, "pdev->id %d, baseaddr 0x%08lx, irq %d\n",
                 pdev->id, ndev->base_addr, ndev->irq);
@@ -2929,18 +2924,24 @@ static int dwceqos_probe(struct platform_device *pdev)
        if (ret) {
                dev_err(&lp->pdev->dev, "Unable to request IRQ %d, error %d\n",
                        ndev->irq, ret);
-               goto err_out_unregister_clk_notifier;
+               goto err_out_clk_dis_phy;
        }
 
        if (netif_msg_probe(lp))
                netdev_dbg(ndev, "net_local@%p\n", lp);
 
+       netif_napi_add(ndev, &lp->napi, dwceqos_rx_poll, NAPI_POLL_WEIGHT);
+
+       ret = register_netdev(ndev);
+       if (ret) {
+               dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
+                       goto err_out_clk_dis_phy;
+       }
+
        return 0;
 
-err_out_unregister_clk_notifier:
+err_out_clk_dis_phy:
        clk_disable_unprepare(lp->phy_ref_clk);
-err_out_unregister_netdev:
-       unregister_netdev(ndev);
 err_out_clk_dis_aper:
        clk_disable_unprepare(lp->apb_pclk);
 err_out_free_netdev:
index 7452b5f..7108c68 100644 (file)
@@ -1987,7 +1987,7 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if ((readl(nic->regs + FPGA_VER) & 0xFFF) >= 378) {
                err = pci_enable_msi(pdev);
                if (err)
-                       pr_err("Can't eneble msi. error is %d\n", err);
+                       pr_err("Can't enable msi. error is %d\n", err);
                else
                        nic->irq_type = IRQ_MSI;
        } else
index d300d53..fa0cfda 100644 (file)
@@ -546,7 +546,8 @@ fatal_error:
 
 static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-       int queue, len;
+       int queue;
+       unsigned int len;
        struct cpmac_desc *desc;
        struct cpmac_priv *priv = netdev_priv(dev);
 
@@ -556,7 +557,7 @@ static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev)
        if (unlikely(skb_padto(skb, ETH_ZLEN)))
                return NETDEV_TX_OK;
 
-       len = max(skb->len, ETH_ZLEN);
+       len = max_t(unsigned int, skb->len, ETH_ZLEN);
        queue = skb_get_queue_mapping(skb);
        netif_stop_subqueue(dev, queue);
 
index 421ebda..c6cff3d 100644 (file)
@@ -124,7 +124,7 @@ do {                                                                \
 
 #define RX_PRIORITY_MAPPING    0x76543210
 #define TX_PRIORITY_MAPPING    0x33221100
-#define CPDMA_TX_PRIORITY_MAP  0x76543210
+#define CPDMA_TX_PRIORITY_MAP  0x01234567
 
 #define CPSW_VLAN_AWARE                BIT(1)
 #define CPSW_ALE_VLAN_AWARE    1
@@ -144,6 +144,7 @@ do {                                                                \
                ((cpsw->data.dual_emac) ? priv->emac_port :     \
                cpsw->data.active_slave)
 #define IRQ_NUM                        2
+#define CPSW_MAX_QUEUES                8
 
 static int debug_level;
 module_param(debug_level, int, 0);
@@ -379,13 +380,15 @@ struct cpsw_common {
        int                             rx_packet_max;
        struct cpsw_slave               *slaves;
        struct cpdma_ctlr               *dma;
-       struct cpdma_chan               *txch, *rxch;
+       struct cpdma_chan               *txch[CPSW_MAX_QUEUES];
+       struct cpdma_chan               *rxch[CPSW_MAX_QUEUES];
        struct cpsw_ale                 *ale;
        bool                            quirk_irq;
        bool                            rx_irq_disabled;
        bool                            tx_irq_disabled;
        u32 irqs_table[IRQ_NUM];
        struct cpts                     *cpts;
+       int                             rx_ch_num, tx_ch_num;
 };
 
 struct cpsw_priv {
@@ -457,35 +460,26 @@ static const struct cpsw_stats cpsw_gstrings_stats[] = {
        { "Rx Start of Frame Overruns", CPSW_STAT(rxsofoverruns) },
        { "Rx Middle of Frame Overruns", CPSW_STAT(rxmofoverruns) },
        { "Rx DMA Overruns", CPSW_STAT(rxdmaoverruns) },
-       { "Rx DMA chan: head_enqueue", CPDMA_RX_STAT(head_enqueue) },
-       { "Rx DMA chan: tail_enqueue", CPDMA_RX_STAT(tail_enqueue) },
-       { "Rx DMA chan: pad_enqueue", CPDMA_RX_STAT(pad_enqueue) },
-       { "Rx DMA chan: misqueued", CPDMA_RX_STAT(misqueued) },
-       { "Rx DMA chan: desc_alloc_fail", CPDMA_RX_STAT(desc_alloc_fail) },
-       { "Rx DMA chan: pad_alloc_fail", CPDMA_RX_STAT(pad_alloc_fail) },
-       { "Rx DMA chan: runt_receive_buf", CPDMA_RX_STAT(runt_receive_buff) },
-       { "Rx DMA chan: runt_transmit_buf", CPDMA_RX_STAT(runt_transmit_buff) },
-       { "Rx DMA chan: empty_dequeue", CPDMA_RX_STAT(empty_dequeue) },
-       { "Rx DMA chan: busy_dequeue", CPDMA_RX_STAT(busy_dequeue) },
-       { "Rx DMA chan: good_dequeue", CPDMA_RX_STAT(good_dequeue) },
-       { "Rx DMA chan: requeue", CPDMA_RX_STAT(requeue) },
-       { "Rx DMA chan: teardown_dequeue", CPDMA_RX_STAT(teardown_dequeue) },
-       { "Tx DMA chan: head_enqueue", CPDMA_TX_STAT(head_enqueue) },
-       { "Tx DMA chan: tail_enqueue", CPDMA_TX_STAT(tail_enqueue) },
-       { "Tx DMA chan: pad_enqueue", CPDMA_TX_STAT(pad_enqueue) },
-       { "Tx DMA chan: misqueued", CPDMA_TX_STAT(misqueued) },
-       { "Tx DMA chan: desc_alloc_fail", CPDMA_TX_STAT(desc_alloc_fail) },
-       { "Tx DMA chan: pad_alloc_fail", CPDMA_TX_STAT(pad_alloc_fail) },
-       { "Tx DMA chan: runt_receive_buf", CPDMA_TX_STAT(runt_receive_buff) },
-       { "Tx DMA chan: runt_transmit_buf", CPDMA_TX_STAT(runt_transmit_buff) },
-       { "Tx DMA chan: empty_dequeue", CPDMA_TX_STAT(empty_dequeue) },
-       { "Tx DMA chan: busy_dequeue", CPDMA_TX_STAT(busy_dequeue) },
-       { "Tx DMA chan: good_dequeue", CPDMA_TX_STAT(good_dequeue) },
-       { "Tx DMA chan: requeue", CPDMA_TX_STAT(requeue) },
-       { "Tx DMA chan: teardown_dequeue", CPDMA_TX_STAT(teardown_dequeue) },
 };
 
-#define CPSW_STATS_LEN ARRAY_SIZE(cpsw_gstrings_stats)
+static const struct cpsw_stats cpsw_gstrings_ch_stats[] = {
+       { "head_enqueue", CPDMA_RX_STAT(head_enqueue) },
+       { "tail_enqueue", CPDMA_RX_STAT(tail_enqueue) },
+       { "pad_enqueue", CPDMA_RX_STAT(pad_enqueue) },
+       { "misqueued", CPDMA_RX_STAT(misqueued) },
+       { "desc_alloc_fail", CPDMA_RX_STAT(desc_alloc_fail) },
+       { "pad_alloc_fail", CPDMA_RX_STAT(pad_alloc_fail) },
+       { "runt_receive_buf", CPDMA_RX_STAT(runt_receive_buff) },
+       { "runt_transmit_buf", CPDMA_RX_STAT(runt_transmit_buff) },
+       { "empty_dequeue", CPDMA_RX_STAT(empty_dequeue) },
+       { "busy_dequeue", CPDMA_RX_STAT(busy_dequeue) },
+       { "good_dequeue", CPDMA_RX_STAT(good_dequeue) },
+       { "requeue", CPDMA_RX_STAT(requeue) },
+       { "teardown_dequeue", CPDMA_RX_STAT(teardown_dequeue) },
+};
+
+#define CPSW_STATS_COMMON_LEN  ARRAY_SIZE(cpsw_gstrings_stats)
+#define CPSW_STATS_CH_LEN      ARRAY_SIZE(cpsw_gstrings_ch_stats)
 
 #define ndev_to_cpsw(ndev) (((struct cpsw_priv *)netdev_priv(ndev))->cpsw)
 #define napi_to_cpsw(napi)     container_of(napi, struct cpsw_common, napi)
@@ -669,6 +663,7 @@ static void cpsw_intr_disable(struct cpsw_common *cpsw)
 
 static void cpsw_tx_handler(void *token, int len, int status)
 {
+       struct netdev_queue     *txq;
        struct sk_buff          *skb = token;
        struct net_device       *ndev = skb->dev;
        struct cpsw_common      *cpsw = ndev_to_cpsw(ndev);
@@ -676,8 +671,10 @@ static void cpsw_tx_handler(void *token, int len, int status)
        /* Check whether the queue is stopped due to stalled tx dma, if the
         * queue is stopped then start the queue as we have free desc for tx
         */
-       if (unlikely(netif_queue_stopped(ndev)))
-               netif_wake_queue(ndev);
+       txq = netdev_get_tx_queue(ndev, skb_get_queue_mapping(skb));
+       if (unlikely(netif_tx_queue_stopped(txq)))
+               netif_tx_wake_queue(txq);
+
        cpts_tx_timestamp(cpsw->cpts, skb);
        ndev->stats.tx_packets++;
        ndev->stats.tx_bytes += len;
@@ -686,6 +683,7 @@ static void cpsw_tx_handler(void *token, int len, int status)
 
 static void cpsw_rx_handler(void *token, int len, int status)
 {
+       struct cpdma_chan       *ch;
        struct sk_buff          *skb = token;
        struct sk_buff          *new_skb;
        struct net_device       *ndev = skb->dev;
@@ -724,6 +722,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
 
        new_skb = netdev_alloc_skb_ip_align(ndev, cpsw->rx_packet_max);
        if (new_skb) {
+               skb_copy_queue_mapping(new_skb, skb);
                skb_put(skb, len);
                cpts_rx_timestamp(cpsw->cpts, skb);
                skb->protocol = eth_type_trans(skb, ndev);
@@ -737,7 +736,13 @@ static void cpsw_rx_handler(void *token, int len, int status)
        }
 
 requeue:
-       ret = cpdma_chan_submit(cpsw->rxch, new_skb, new_skb->data,
+       if (netif_dormant(ndev)) {
+               dev_kfree_skb_any(new_skb);
+               return;
+       }
+
+       ch = cpsw->rxch[skb_get_queue_mapping(new_skb)];
+       ret = cpdma_chan_submit(ch, new_skb, new_skb->data,
                                skb_tailroom(new_skb), 0);
        if (WARN_ON(ret < 0))
                dev_kfree_skb_any(new_skb);
@@ -777,10 +782,27 @@ static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id)
 
 static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget)
 {
+       u32                     ch_map;
+       int                     num_tx, ch;
        struct cpsw_common      *cpsw = napi_to_cpsw(napi_tx);
-       int                     num_tx;
 
-       num_tx = cpdma_chan_process(cpsw->txch, budget);
+       /* process every unprocessed channel */
+       ch_map = cpdma_ctrl_txchs_state(cpsw->dma);
+       for (ch = 0, num_tx = 0; num_tx < budget; ch_map >>= 1, ch++) {
+               if (!ch_map) {
+                       ch_map = cpdma_ctrl_txchs_state(cpsw->dma);
+                       if (!ch_map)
+                               break;
+
+                       ch = 0;
+               }
+
+               if (!(ch_map & 0x01))
+                       continue;
+
+               num_tx += cpdma_chan_process(cpsw->txch[ch], budget - num_tx);
+       }
+
        if (num_tx < budget) {
                napi_complete(napi_tx);
                writel(0xff, &cpsw->wr_regs->tx_en);
@@ -795,10 +817,27 @@ static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget)
 
 static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget)
 {
+       u32                     ch_map;
+       int                     num_rx, ch;
        struct cpsw_common      *cpsw = napi_to_cpsw(napi_rx);
-       int                     num_rx;
 
-       num_rx = cpdma_chan_process(cpsw->rxch, budget);
+       /* process every unprocessed channel */
+       ch_map = cpdma_ctrl_rxchs_state(cpsw->dma);
+       for (ch = 0, num_rx = 0; num_rx < budget; ch_map >>= 1, ch++) {
+               if (!ch_map) {
+                       ch_map = cpdma_ctrl_rxchs_state(cpsw->dma);
+                       if (!ch_map)
+                               break;
+
+                       ch = 0;
+               }
+
+               if (!(ch_map & 0x01))
+                       continue;
+
+               num_rx += cpdma_chan_process(cpsw->rxch[ch], budget - num_rx);
+       }
+
        if (num_rx < budget) {
                napi_complete(napi_rx);
                writel(0xff, &cpsw->wr_regs->rx_en);
@@ -897,10 +936,10 @@ static void cpsw_adjust_link(struct net_device *ndev)
        if (link) {
                netif_carrier_on(ndev);
                if (netif_running(ndev))
-                       netif_wake_queue(ndev);
+                       netif_tx_wake_all_queues(ndev);
        } else {
                netif_carrier_off(ndev);
-               netif_stop_queue(ndev);
+               netif_tx_stop_all_queues(ndev);
        }
 }
 
@@ -973,26 +1012,51 @@ update_return:
 
 static int cpsw_get_sset_count(struct net_device *ndev, int sset)
 {
+       struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+
        switch (sset) {
        case ETH_SS_STATS:
-               return CPSW_STATS_LEN;
+               return (CPSW_STATS_COMMON_LEN +
+                      (cpsw->rx_ch_num + cpsw->tx_ch_num) *
+                      CPSW_STATS_CH_LEN);
        default:
                return -EOPNOTSUPP;
        }
 }
 
+static void cpsw_add_ch_strings(u8 **p, int ch_num, int rx_dir)
+{
+       int ch_stats_len;
+       int line;
+       int i;
+
+       ch_stats_len = CPSW_STATS_CH_LEN * ch_num;
+       for (i = 0; i < ch_stats_len; i++) {
+               line = i % CPSW_STATS_CH_LEN;
+               snprintf(*p, ETH_GSTRING_LEN,
+                        "%s DMA chan %d: %s", rx_dir ? "Rx" : "Tx",
+                        i / CPSW_STATS_CH_LEN,
+                        cpsw_gstrings_ch_stats[line].stat_string);
+               *p += ETH_GSTRING_LEN;
+       }
+}
+
 static void cpsw_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
 {
+       struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
        u8 *p = data;
        int i;
 
        switch (stringset) {
        case ETH_SS_STATS:
-               for (i = 0; i < CPSW_STATS_LEN; i++) {
+               for (i = 0; i < CPSW_STATS_COMMON_LEN; i++) {
                        memcpy(p, cpsw_gstrings_stats[i].stat_string,
                               ETH_GSTRING_LEN);
                        p += ETH_GSTRING_LEN;
                }
+
+               cpsw_add_ch_strings(&p, cpsw->rx_ch_num, 1);
+               cpsw_add_ch_strings(&p, cpsw->tx_ch_num, 0);
                break;
        }
 }
@@ -1000,36 +1064,31 @@ static void cpsw_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
 static void cpsw_get_ethtool_stats(struct net_device *ndev,
                                    struct ethtool_stats *stats, u64 *data)
 {
-       struct cpdma_chan_stats rx_stats;
-       struct cpdma_chan_stats tx_stats;
-       u32 val;
        u8 *p;
-       int i;
        struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+       struct cpdma_chan_stats ch_stats;
+       int i, l, ch;
 
        /* Collect Davinci CPDMA stats for Rx and Tx Channel */
-       cpdma_chan_get_stats(cpsw->rxch, &rx_stats);
-       cpdma_chan_get_stats(cpsw->txch, &tx_stats);
-
-       for (i = 0; i < CPSW_STATS_LEN; i++) {
-               switch (cpsw_gstrings_stats[i].type) {
-               case CPSW_STATS:
-                       val = readl(cpsw->hw_stats +
-                                   cpsw_gstrings_stats[i].stat_offset);
-                       data[i] = val;
-                       break;
-
-               case CPDMA_RX_STATS:
-                       p = (u8 *)&rx_stats +
-                               cpsw_gstrings_stats[i].stat_offset;
-                       data[i] = *(u32 *)p;
-                       break;
+       for (l = 0; l < CPSW_STATS_COMMON_LEN; l++)
+               data[l] = readl(cpsw->hw_stats +
+                               cpsw_gstrings_stats[l].stat_offset);
+
+       for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
+               cpdma_chan_get_stats(cpsw->rxch[ch], &ch_stats);
+               for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) {
+                       p = (u8 *)&ch_stats +
+                               cpsw_gstrings_ch_stats[i].stat_offset;
+                       data[l] = *(u32 *)p;
+               }
+       }
 
-               case CPDMA_TX_STATS:
-                       p = (u8 *)&tx_stats +
-                               cpsw_gstrings_stats[i].stat_offset;
-                       data[i] = *(u32 *)p;
-                       break;
+       for (ch = 0; ch < cpsw->tx_ch_num; ch++) {
+               cpdma_chan_get_stats(cpsw->txch[ch], &ch_stats);
+               for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) {
+                       p = (u8 *)&ch_stats +
+                               cpsw_gstrings_ch_stats[i].stat_offset;
+                       data[l] = *(u32 *)p;
                }
        }
 }
@@ -1050,11 +1109,12 @@ static int cpsw_common_res_usage_state(struct cpsw_common *cpsw)
 }
 
 static inline int cpsw_tx_packet_submit(struct cpsw_priv *priv,
-                                       struct sk_buff *skb)
+                                       struct sk_buff *skb,
+                                       struct cpdma_chan *txch)
 {
        struct cpsw_common *cpsw = priv->cpsw;
 
-       return cpdma_chan_submit(cpsw->txch, skb, skb->data, skb->len,
+       return cpdma_chan_submit(txch, skb, skb->data, skb->len,
                                 priv->emac_port + cpsw->data.dual_emac);
 }
 
@@ -1213,6 +1273,44 @@ static void cpsw_init_host_port(struct cpsw_priv *priv)
        }
 }
 
+static int cpsw_fill_rx_channels(struct cpsw_priv *priv)
+{
+       struct cpsw_common *cpsw = priv->cpsw;
+       struct sk_buff *skb;
+       int ch_buf_num;
+       int ch, i, ret;
+
+       for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
+               ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxch[ch]);
+               for (i = 0; i < ch_buf_num; i++) {
+                       skb = __netdev_alloc_skb_ip_align(priv->ndev,
+                                                         cpsw->rx_packet_max,
+                                                         GFP_KERNEL);
+                       if (!skb) {
+                               cpsw_err(priv, ifup, "cannot allocate skb\n");
+                               return -ENOMEM;
+                       }
+
+                       skb_set_queue_mapping(skb, ch);
+                       ret = cpdma_chan_submit(cpsw->rxch[ch], skb, skb->data,
+                                               skb_tailroom(skb), 0);
+                       if (ret < 0) {
+                               cpsw_err(priv, ifup,
+                                        "cannot submit skb to channel %d rx, error %d\n",
+                                        ch, ret);
+                               kfree_skb(skb);
+                               return ret;
+                       }
+                       kmemleak_not_leak(skb);
+               }
+
+               cpsw_info(priv, ifup, "ch %d rx, submitted %d descriptors\n",
+                         ch, ch_buf_num);
+       }
+
+       return 0;
+}
+
 static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_common *cpsw)
 {
        u32 slave_port;
@@ -1233,7 +1331,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
        struct cpsw_common *cpsw = priv->cpsw;
-       int i, ret;
+       int ret;
        u32 reg;
 
        ret = pm_runtime_get_sync(cpsw->dev);
@@ -1246,6 +1344,19 @@ static int cpsw_ndo_open(struct net_device *ndev)
                cpsw_intr_disable(cpsw);
        netif_carrier_off(ndev);
 
+       /* Notify the stack of the actual queue counts. */
+       ret = netif_set_real_num_tx_queues(ndev, cpsw->tx_ch_num);
+       if (ret) {
+               dev_err(priv->dev, "cannot set real number of tx queues\n");
+               goto err_cleanup;
+       }
+
+       ret = netif_set_real_num_rx_queues(ndev, cpsw->rx_ch_num);
+       if (ret) {
+               dev_err(priv->dev, "cannot set real number of rx queues\n");
+               goto err_cleanup;
+       }
+
        reg = cpsw->version;
 
        dev_info(priv->dev, "initializing cpsw version %d.%d (%d)\n",
@@ -1265,8 +1376,6 @@ static int cpsw_ndo_open(struct net_device *ndev)
                                  ALE_ALL_PORTS, ALE_ALL_PORTS, 0, 0);
 
        if (!cpsw_common_res_usage_state(cpsw)) {
-               int buf_num;
-
                /* setup tx dma to fixed prio and zero offset */
                cpdma_control_set(cpsw->dma, CPDMA_TX_PRIO_FIXED, 1);
                cpdma_control_set(cpsw->dma, CPDMA_RX_BUFFER_OFFSET, 0);
@@ -1293,27 +1402,9 @@ static int cpsw_ndo_open(struct net_device *ndev)
                        enable_irq(cpsw->irqs_table[0]);
                }
 
-               buf_num = cpdma_chan_get_rx_buf_num(cpsw->dma);
-               for (i = 0; i < buf_num; i++) {
-                       struct sk_buff *skb;
-
-                       ret = -ENOMEM;
-                       skb = __netdev_alloc_skb_ip_align(priv->ndev,
-                                       cpsw->rx_packet_max, GFP_KERNEL);
-                       if (!skb)
-                               goto err_cleanup;
-                       ret = cpdma_chan_submit(cpsw->rxch, skb, skb->data,
-                                               skb_tailroom(skb), 0);
-                       if (ret < 0) {
-                               kfree_skb(skb);
-                               goto err_cleanup;
-                       }
-                       kmemleak_not_leak(skb);
-               }
-               /* continue even if we didn't manage to submit all
-                * receive descs
-                */
-               cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i);
+               ret = cpsw_fill_rx_channels(priv);
+               if (ret < 0)
+                       goto err_cleanup;
 
                if (cpts_register(cpsw->dev, cpsw->cpts,
                                  cpsw->data.cpts_clock_mult,
@@ -1335,6 +1426,9 @@ static int cpsw_ndo_open(struct net_device *ndev)
 
        if (cpsw->data.dual_emac)
                cpsw->slaves[priv->emac_port].open_stat = true;
+
+       netif_tx_start_all_queues(ndev);
+
        return 0;
 
 err_cleanup:
@@ -1351,7 +1445,7 @@ static int cpsw_ndo_stop(struct net_device *ndev)
        struct cpsw_common *cpsw = priv->cpsw;
 
        cpsw_info(priv, ifdown, "shutting down cpsw device\n");
-       netif_stop_queue(priv->ndev);
+       netif_tx_stop_all_queues(priv->ndev);
        netif_carrier_off(priv->ndev);
 
        if (cpsw_common_res_usage_state(cpsw) <= 1) {
@@ -1373,8 +1467,10 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
                                       struct net_device *ndev)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
-       int ret;
        struct cpsw_common *cpsw = priv->cpsw;
+       struct netdev_queue *txq;
+       struct cpdma_chan *txch;
+       int ret, q_idx;
 
        netif_trans_update(ndev);
 
@@ -1390,7 +1486,12 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
 
        skb_tx_timestamp(skb);
 
-       ret = cpsw_tx_packet_submit(priv, skb);
+       q_idx = skb_get_queue_mapping(skb);
+       if (q_idx >= cpsw->tx_ch_num)
+               q_idx = q_idx % cpsw->tx_ch_num;
+
+       txch = cpsw->txch[q_idx];
+       ret = cpsw_tx_packet_submit(priv, skb, txch);
        if (unlikely(ret != 0)) {
                cpsw_err(priv, tx_err, "desc submit failed\n");
                goto fail;
@@ -1399,13 +1500,16 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
        /* If there is no more tx desc left free then we need to
         * tell the kernel to stop sending us tx frames.
         */
-       if (unlikely(!cpdma_check_free_tx_desc(cpsw->txch)))
-               netif_stop_queue(ndev);
+       if (unlikely(!cpdma_check_free_tx_desc(txch))) {
+               txq = netdev_get_tx_queue(ndev, q_idx);
+               netif_tx_stop_queue(txq);
+       }
 
        return NETDEV_TX_OK;
 fail:
        ndev->stats.tx_dropped++;
-       netif_stop_queue(ndev);
+       txq = netdev_get_tx_queue(ndev, skb_get_queue_mapping(skb));
+       netif_tx_stop_queue(txq);
        return NETDEV_TX_BUSY;
 }
 
@@ -1587,12 +1691,16 @@ static void cpsw_ndo_tx_timeout(struct net_device *ndev)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
        struct cpsw_common *cpsw = priv->cpsw;
+       int ch;
 
        cpsw_err(priv, tx_err, "transmit timeout, restarting dma\n");
        ndev->stats.tx_errors++;
        cpsw_intr_disable(cpsw);
-       cpdma_chan_stop(cpsw->txch);
-       cpdma_chan_start(cpsw->txch);
+       for (ch = 0; ch < cpsw->tx_ch_num; ch++) {
+               cpdma_chan_stop(cpsw->txch[ch]);
+               cpdma_chan_start(cpsw->txch[ch]);
+       }
+
        cpsw_intr_enable(cpsw);
 }
 
@@ -1957,6 +2065,180 @@ static void cpsw_ethtool_op_complete(struct net_device *ndev)
                cpsw_err(priv, drv, "ethtool complete failed %d\n", ret);
 }
 
+static void cpsw_get_channels(struct net_device *ndev,
+                             struct ethtool_channels *ch)
+{
+       struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+
+       ch->max_combined = 0;
+       ch->max_rx = CPSW_MAX_QUEUES;
+       ch->max_tx = CPSW_MAX_QUEUES;
+       ch->max_other = 0;
+       ch->other_count = 0;
+       ch->rx_count = cpsw->rx_ch_num;
+       ch->tx_count = cpsw->tx_ch_num;
+       ch->combined_count = 0;
+}
+
+static int cpsw_check_ch_settings(struct cpsw_common *cpsw,
+                                 struct ethtool_channels *ch)
+{
+       if (ch->combined_count)
+               return -EINVAL;
+
+       /* verify we have at least one channel in each direction */
+       if (!ch->rx_count || !ch->tx_count)
+               return -EINVAL;
+
+       if (ch->rx_count > cpsw->data.channels ||
+           ch->tx_count > cpsw->data.channels)
+               return -EINVAL;
+
+       return 0;
+}
+
+static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx)
+{
+       int (*poll)(struct napi_struct *, int);
+       struct cpsw_common *cpsw = priv->cpsw;
+       void (*handler)(void *, int, int);
+       struct cpdma_chan **chan;
+       int ret, *ch;
+
+       if (rx) {
+               ch = &cpsw->rx_ch_num;
+               chan = cpsw->rxch;
+               handler = cpsw_rx_handler;
+               poll = cpsw_rx_poll;
+       } else {
+               ch = &cpsw->tx_ch_num;
+               chan = cpsw->txch;
+               handler = cpsw_tx_handler;
+               poll = cpsw_tx_poll;
+       }
+
+       while (*ch < ch_num) {
+               chan[*ch] = cpdma_chan_create(cpsw->dma, *ch, handler, rx);
+
+               if (IS_ERR(chan[*ch]))
+                       return PTR_ERR(chan[*ch]);
+
+               if (!chan[*ch])
+                       return -EINVAL;
+
+               cpsw_info(priv, ifup, "created new %d %s channel\n", *ch,
+                         (rx ? "rx" : "tx"));
+               (*ch)++;
+       }
+
+       while (*ch > ch_num) {
+               (*ch)--;
+
+               ret = cpdma_chan_destroy(chan[*ch]);
+               if (ret)
+                       return ret;
+
+               cpsw_info(priv, ifup, "destroyed %d %s channel\n", *ch,
+                         (rx ? "rx" : "tx"));
+       }
+
+       return 0;
+}
+
+static int cpsw_update_channels(struct cpsw_priv *priv,
+                               struct ethtool_channels *ch)
+{
+       int ret;
+
+       ret = cpsw_update_channels_res(priv, ch->rx_count, 1);
+       if (ret)
+               return ret;
+
+       ret = cpsw_update_channels_res(priv, ch->tx_count, 0);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int cpsw_set_channels(struct net_device *ndev,
+                            struct ethtool_channels *chs)
+{
+       struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = priv->cpsw;
+       struct cpsw_slave *slave;
+       int i, ret;
+
+       ret = cpsw_check_ch_settings(cpsw, chs);
+       if (ret < 0)
+               return ret;
+
+       /* Disable NAPI scheduling */
+       cpsw_intr_disable(cpsw);
+
+       /* Stop all transmit queues for every network device.
+        * Disable re-using rx descriptors with dormant_on.
+        */
+       for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) {
+               if (!(slave->ndev && netif_running(slave->ndev)))
+                       continue;
+
+               netif_tx_stop_all_queues(slave->ndev);
+               netif_dormant_on(slave->ndev);
+       }
+
+       /* Handle rest of tx packets and stop cpdma channels */
+       cpdma_ctlr_stop(cpsw->dma);
+       ret = cpsw_update_channels(priv, chs);
+       if (ret)
+               goto err;
+
+       for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) {
+               if (!(slave->ndev && netif_running(slave->ndev)))
+                       continue;
+
+               /* Inform stack about new count of queues */
+               ret = netif_set_real_num_tx_queues(slave->ndev,
+                                                  cpsw->tx_ch_num);
+               if (ret) {
+                       dev_err(priv->dev, "cannot set real number of tx queues\n");
+                       goto err;
+               }
+
+               ret = netif_set_real_num_rx_queues(slave->ndev,
+                                                  cpsw->rx_ch_num);
+               if (ret) {
+                       dev_err(priv->dev, "cannot set real number of rx queues\n");
+                       goto err;
+               }
+
+               /* Enable rx packets handling */
+               netif_dormant_off(slave->ndev);
+       }
+
+       if (cpsw_common_res_usage_state(cpsw)) {
+               ret = cpsw_fill_rx_channels(priv);
+               if (ret)
+                       goto err;
+
+               /* After this receive is started */
+               cpdma_ctlr_start(cpsw->dma);
+               cpsw_intr_enable(cpsw);
+       }
+
+       /* Resume transmit for every affected interface */
+       for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) {
+               if (!(slave->ndev && netif_running(slave->ndev)))
+                       continue;
+               netif_tx_start_all_queues(slave->ndev);
+       }
+       return 0;
+err:
+       dev_err(priv->dev, "cannot update channels number, closing device\n");
+       dev_close(ndev);
+       return ret;
+}
+
 static const struct ethtool_ops cpsw_ethtool_ops = {
        .get_drvinfo    = cpsw_get_drvinfo,
        .get_msglevel   = cpsw_get_msglevel,
@@ -1978,6 +2260,8 @@ static const struct ethtool_ops cpsw_ethtool_ops = {
        .get_regs       = cpsw_get_regs,
        .begin          = cpsw_ethtool_op_begin,
        .complete       = cpsw_ethtool_op_complete,
+       .get_channels   = cpsw_get_channels,
+       .set_channels   = cpsw_set_channels,
 };
 
 static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_common *cpsw,
@@ -2164,7 +2448,7 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv)
        struct cpsw_priv                *priv_sl2;
        int ret = 0;
 
-       ndev = alloc_etherdev(sizeof(struct cpsw_priv));
+       ndev = alloc_etherdev_mq(sizeof(struct cpsw_priv), CPSW_MAX_QUEUES);
        if (!ndev) {
                dev_err(cpsw->dev, "cpsw: error allocating net_device\n");
                return -ENOMEM;
@@ -2265,7 +2549,7 @@ static int cpsw_probe(struct platform_device *pdev)
        cpsw = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_common), GFP_KERNEL);
        cpsw->dev = &pdev->dev;
 
-       ndev = alloc_etherdev(sizeof(struct cpsw_priv));
+       ndev = alloc_etherdev_mq(sizeof(struct cpsw_priv), CPSW_MAX_QUEUES);
        if (!ndev) {
                dev_err(&pdev->dev, "error allocating net_device\n");
                return -ENOMEM;
@@ -2306,6 +2590,8 @@ static int cpsw_probe(struct platform_device *pdev)
                goto clean_runtime_disable_ret;
        }
        data = &cpsw->data;
+       cpsw->rx_ch_num = 1;
+       cpsw->tx_ch_num = 1;
 
        if (is_valid_ether_addr(data->slave_data[0].mac_addr)) {
                memcpy(priv->mac_addr, data->slave_data[0].mac_addr, ETH_ALEN);
@@ -2430,12 +2716,9 @@ static int cpsw_probe(struct platform_device *pdev)
                goto clean_runtime_disable_ret;
        }
 
-       cpsw->txch = cpdma_chan_create(cpsw->dma, tx_chan_num(0),
-                                      cpsw_tx_handler);
-       cpsw->rxch = cpdma_chan_create(cpsw->dma, rx_chan_num(0),
-                                      cpsw_rx_handler);
-
-       if (WARN_ON(!cpsw->txch || !cpsw->rxch)) {
+       cpsw->txch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0);
+       cpsw->rxch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_rx_handler, 1);
+       if (WARN_ON(!cpsw->rxch[0] || !cpsw->txch[0])) {
                dev_err(priv->dev, "error initializing dma channels\n");
                ret = -ENOMEM;
                goto clean_dma_ret;
index cf72b33..c3f35f1 100644 (file)
@@ -104,6 +104,7 @@ struct cpdma_ctlr {
        struct cpdma_desc_pool  *pool;
        spinlock_t              lock;
        struct cpdma_chan       *channels[2 * CPDMA_MAX_CHANNELS];
+       int chan_num;
 };
 
 struct cpdma_chan {
@@ -123,6 +124,13 @@ struct cpdma_chan {
        int     int_set, int_clear, td;
 };
 
+#define tx_chan_num(chan)      (chan)
+#define rx_chan_num(chan)      ((chan) + CPDMA_MAX_CHANNELS)
+#define is_rx_chan(chan)       ((chan)->chan_num >= CPDMA_MAX_CHANNELS)
+#define is_tx_chan(chan)       (!is_rx_chan(chan))
+#define __chan_linear(chan_num)        ((chan_num) & (CPDMA_MAX_CHANNELS - 1))
+#define chan_linear(chan)      __chan_linear((chan)->chan_num)
+
 /* The following make access to common cpdma_ctlr params more readable */
 #define dmaregs                params.dmaregs
 #define num_chan       params.num_chan
@@ -256,6 +264,7 @@ struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params)
        ctlr->state = CPDMA_STATE_IDLE;
        ctlr->params = *params;
        ctlr->dev = params->dev;
+       ctlr->chan_num = 0;
        spin_lock_init(&ctlr->lock);
 
        ctlr->pool = cpdma_desc_pool_create(ctlr->dev,
@@ -332,12 +341,14 @@ int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr)
        }
 
        ctlr->state = CPDMA_STATE_TEARDOWN;
+       spin_unlock_irqrestore(&ctlr->lock, flags);
 
        for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++) {
                if (ctlr->channels[i])
                        cpdma_chan_stop(ctlr->channels[i]);
        }
 
+       spin_lock_irqsave(&ctlr->lock, flags);
        dma_reg_write(ctlr, CPDMA_RXINTMASKCLEAR, 0xffffffff);
        dma_reg_write(ctlr, CPDMA_TXINTMASKCLEAR, 0xffffffff);
 
@@ -399,13 +410,52 @@ void cpdma_ctlr_eoi(struct cpdma_ctlr *ctlr, u32 value)
 }
 EXPORT_SYMBOL_GPL(cpdma_ctlr_eoi);
 
+u32 cpdma_ctrl_rxchs_state(struct cpdma_ctlr *ctlr)
+{
+       return dma_reg_read(ctlr, CPDMA_RXINTSTATMASKED);
+}
+EXPORT_SYMBOL_GPL(cpdma_ctrl_rxchs_state);
+
+u32 cpdma_ctrl_txchs_state(struct cpdma_ctlr *ctlr)
+{
+       return dma_reg_read(ctlr, CPDMA_TXINTSTATMASKED);
+}
+EXPORT_SYMBOL_GPL(cpdma_ctrl_txchs_state);
+
+/**
+ * cpdma_chan_split_pool - Splits ctrl pool between all channels.
+ * Has to be called under ctlr lock
+ */
+static void cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
+{
+       struct cpdma_desc_pool *pool = ctlr->pool;
+       struct cpdma_chan *chan;
+       int ch_desc_num;
+       int i;
+
+       if (!ctlr->chan_num)
+               return;
+
+       /* calculate average size of pool slice */
+       ch_desc_num = pool->num_desc / ctlr->chan_num;
+
+       /* split ctlr pool */
+       for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++) {
+               chan = ctlr->channels[i];
+               if (chan)
+                       chan->desc_num = ch_desc_num;
+       }
+}
+
 struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num,
-                                    cpdma_handler_fn handler)
+                                    cpdma_handler_fn handler, int rx_type)
 {
+       int offset = chan_num * 4;
        struct cpdma_chan *chan;
-       int offset = (chan_num % CPDMA_MAX_CHANNELS) * 4;
        unsigned long flags;
 
+       chan_num = rx_type ? rx_chan_num(chan_num) : tx_chan_num(chan_num);
+
        if (__chan_linear(chan_num) >= ctlr->num_chan)
                return NULL;
 
@@ -447,14 +497,25 @@ struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num,
        spin_lock_init(&chan->lock);
 
        ctlr->channels[chan_num] = chan;
+       ctlr->chan_num++;
+
+       cpdma_chan_split_pool(ctlr);
+
        spin_unlock_irqrestore(&ctlr->lock, flags);
        return chan;
 }
 EXPORT_SYMBOL_GPL(cpdma_chan_create);
 
-int cpdma_chan_get_rx_buf_num(struct cpdma_ctlr *ctlr)
+int cpdma_chan_get_rx_buf_num(struct cpdma_chan *chan)
 {
-       return ctlr->pool->num_desc / 2;
+       unsigned long flags;
+       int desc_num;
+
+       spin_lock_irqsave(&chan->lock, flags);
+       desc_num = chan->desc_num;
+       spin_unlock_irqrestore(&chan->lock, flags);
+
+       return desc_num;
 }
 EXPORT_SYMBOL_GPL(cpdma_chan_get_rx_buf_num);
 
@@ -471,6 +532,10 @@ int cpdma_chan_destroy(struct cpdma_chan *chan)
        if (chan->state != CPDMA_STATE_IDLE)
                cpdma_chan_stop(chan);
        ctlr->channels[chan->chan_num] = NULL;
+       ctlr->chan_num--;
+
+       cpdma_chan_split_pool(ctlr);
+
        spin_unlock_irqrestore(&ctlr->lock, flags);
        return 0;
 }
index 4b46cd6..a07b22b 100644 (file)
 
 #define CPDMA_MAX_CHANNELS     BITS_PER_LONG
 
-#define tx_chan_num(chan)      (chan)
-#define rx_chan_num(chan)      ((chan) + CPDMA_MAX_CHANNELS)
-#define is_rx_chan(chan)       ((chan)->chan_num >= CPDMA_MAX_CHANNELS)
-#define is_tx_chan(chan)       (!is_rx_chan(chan))
-#define __chan_linear(chan_num)        ((chan_num) & (CPDMA_MAX_CHANNELS - 1))
-#define chan_linear(chan)      __chan_linear((chan)->chan_num)
-
 #define CPDMA_RX_SOURCE_PORT(__status__)       ((__status__ >> 16) & 0x7)
 
 #define CPDMA_EOI_RX_THRESH    0x0
@@ -79,8 +72,8 @@ int cpdma_ctlr_start(struct cpdma_ctlr *ctlr);
 int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr);
 
 struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num,
-                                    cpdma_handler_fn handler);
-int cpdma_chan_get_rx_buf_num(struct cpdma_ctlr *ctlr);
+                                    cpdma_handler_fn handler, int rx_type);
+int cpdma_chan_get_rx_buf_num(struct cpdma_chan *chan);
 int cpdma_chan_destroy(struct cpdma_chan *chan);
 int cpdma_chan_start(struct cpdma_chan *chan);
 int cpdma_chan_stop(struct cpdma_chan *chan);
@@ -94,6 +87,8 @@ int cpdma_chan_process(struct cpdma_chan *chan, int quota);
 int cpdma_ctlr_int_ctrl(struct cpdma_ctlr *ctlr, bool enable);
 void cpdma_ctlr_eoi(struct cpdma_ctlr *ctlr, u32 value);
 int cpdma_chan_int_ctrl(struct cpdma_chan *chan, bool enable);
+u32 cpdma_ctrl_rxchs_state(struct cpdma_ctlr *ctlr);
+u32 cpdma_ctrl_txchs_state(struct cpdma_ctlr *ctlr);
 bool cpdma_check_free_tx_desc(struct cpdma_chan *chan);
 
 enum cpdma_control {
index 2d6fc9a..2fd94a5 100644 (file)
@@ -1870,10 +1870,10 @@ static int davinci_emac_probe(struct platform_device *pdev)
                goto no_pdata;
        }
 
-       priv->txchan = cpdma_chan_create(priv->dma, tx_chan_num(EMAC_DEF_TX_CH),
-                                      emac_tx_handler);
-       priv->rxchan = cpdma_chan_create(priv->dma, rx_chan_num(EMAC_DEF_RX_CH),
-                                      emac_rx_handler);
+       priv->txchan = cpdma_chan_create(priv->dma, EMAC_DEF_TX_CH,
+                                        emac_tx_handler, 0);
+       priv->rxchan = cpdma_chan_create(priv->dma, EMAC_DEF_RX_CH,
+                                        emac_rx_handler, 1);
        if (WARN_ON(!priv->txchan || !priv->rxchan)) {
                rc = -ENOMEM;
                goto no_cpdma_chan;
index 79f0ec4..bc258d7 100644 (file)
@@ -1791,7 +1791,7 @@ fail_alloc_rx:
        gelic_card_free_chain(card, card->tx_chain.head);
 fail_alloc_tx:
        free_irq(card->irq, card);
-       netdev->irq = NO_IRQ;
+       netdev->irq = 0;
 fail_request_irq:
        ps3_sb_event_receive_port_destroy(dev, card->irq);
 fail_alloc_irq:
@@ -1843,7 +1843,7 @@ static int ps3_gelic_driver_remove(struct ps3_system_bus_device *dev)
        netdev0 = card->netdev[GELIC_PORT_ETHERNET_0];
        /* disconnect event port */
        free_irq(card->irq, card);
-       netdev0->irq = NO_IRQ;
+       netdev0->irq = 0;
        ps3_sb_event_receive_port_destroy(card->dev, card->irq);
 
        wait_event(card->waitq,
index f38696c..908e72e 100644 (file)
@@ -1724,24 +1724,21 @@ static void velocity_free_tx_buf(struct velocity_info *vptr,
                struct velocity_td_info *tdinfo, struct tx_desc *td)
 {
        struct sk_buff *skb = tdinfo->skb;
+       int i;
 
        /*
         *      Don't unmap the pre-allocated tx_bufs
         */
-       if (tdinfo->skb_dma) {
-               int i;
+       for (i = 0; i < tdinfo->nskb_dma; i++) {
+               size_t pktlen = max_t(size_t, skb->len, ETH_ZLEN);
 
-               for (i = 0; i < tdinfo->nskb_dma; i++) {
-                       size_t pktlen = max_t(size_t, skb->len, ETH_ZLEN);
+               /* For scatter-gather */
+               if (skb_shinfo(skb)->nr_frags > 0)
+                       pktlen = max_t(size_t, pktlen,
+                                      td->td_buf[i].size & ~TD_QUEUE);
 
-                       /* For scatter-gather */
-                       if (skb_shinfo(skb)->nr_frags > 0)
-                               pktlen = max_t(size_t, pktlen,
-                                               td->td_buf[i].size & ~TD_QUEUE);
-
-                       dma_unmap_single(vptr->dev, tdinfo->skb_dma[i],
-                                       le16_to_cpu(pktlen), DMA_TO_DEVICE);
-               }
+               dma_unmap_single(vptr->dev, tdinfo->skb_dma[i],
+                                le16_to_cpu(pktlen), DMA_TO_DEVICE);
        }
        dev_kfree_skb_irq(skb);
        tdinfo->skb = NULL;
index 4f5c024..6d68c8a 100644 (file)
@@ -5,7 +5,7 @@
 config NET_VENDOR_XILINX
        bool "Xilinx devices"
        default y
-       depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ
+       depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS
        ---help---
          If you have a network (Ethernet) card belonging to this class, say Y.
 
@@ -18,7 +18,7 @@ if NET_VENDOR_XILINX
 
 config XILINX_EMACLITE
        tristate "Xilinx 10/100 Ethernet Lite support"
-       depends on (PPC32 || MICROBLAZE || ARCH_ZYNQ)
+       depends on PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS
        select PHYLIB
        ---help---
          This driver supports the 10/100 Ethernet Lite from Xilinx.
index 36ee7ab..69e2a83 100644 (file)
@@ -1297,7 +1297,7 @@ static int axienet_ethtools_set_coalesce(struct net_device *ndev,
        return 0;
 }
 
-static struct ethtool_ops axienet_ethtool_ops = {
+static const struct ethtool_ops axienet_ethtool_ops = {
        .get_drvinfo    = axienet_ethtools_get_drvinfo,
        .get_regs_len   = axienet_ethtools_get_regs_len,
        .get_regs       = axienet_ethtools_get_regs,
index 3cee84a..93dc10b 100644 (file)
@@ -1131,11 +1131,13 @@ static int xemaclite_of_probe(struct platform_device *ofdev)
        lp->rx_ping_pong = get_bool(ofdev, "xlnx,rx-ping-pong");
        mac_address = of_get_mac_address(ofdev->dev.of_node);
 
-       if (mac_address)
+       if (mac_address) {
                /* Set the MAC address. */
                memcpy(ndev->dev_addr, mac_address, ETH_ALEN);
-       else
-               dev_warn(dev, "No MAC address found\n");
+       } else {
+               dev_warn(dev, "No MAC address found, using random\n");
+               eth_hw_addr_random(ndev);
+       }
 
        /* Clear the Tx CSR's in case this is a restart */
        __raw_writel(0, lp->base_addr + XEL_TSR_OFFSET);
index d95a50a..622ab3a 100644 (file)
@@ -484,7 +484,7 @@ static void bpq_setup(struct net_device *dev)
        dev->flags      = 0;
        dev->features   = NETIF_F_LLTX; /* Allow recursion */
 
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if IS_ENABLED(CONFIG_AX25)
        dev->header_ops      = &ax25_header_ops;
 #endif
 
index fa7b1e4..284b97b 100644 (file)
@@ -84,8 +84,6 @@ struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */
 #define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2   40
 
 #define ITAB_NUM 128
-#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
-extern u8 netvsc_hash_key[];
 
 struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */
        struct ndis_obj_header hdr;
@@ -175,7 +173,7 @@ struct rndis_device {
 struct rndis_message;
 struct netvsc_device;
 int netvsc_device_add(struct hv_device *device, void *additional_info);
-int netvsc_device_remove(struct hv_device *device);
+void netvsc_device_remove(struct hv_device *device);
 int netvsc_send(struct hv_device *device,
                struct hv_netvsc_packet *packet,
                struct rndis_message *rndis_msg,
@@ -634,12 +632,34 @@ struct multi_send_data {
        u32 count; /* counter of batched packets */
 };
 
+struct recv_comp_data {
+       u64 tid; /* transaction id */
+       u32 status;
+};
+
+/* Netvsc Receive Slots Max */
+#define NETVSC_RECVSLOT_MAX (NETVSC_RECEIVE_BUFFER_SIZE / ETH_DATA_LEN + 1)
+
+struct multi_recv_comp {
+       void *buf; /* queued receive completions */
+       u32 first; /* first data entry */
+       u32 next; /* next entry for writing */
+};
+
 struct netvsc_stats {
        u64 packets;
        u64 bytes;
        struct u64_stats_sync syncp;
 };
 
+struct netvsc_ethtool_stats {
+       unsigned long tx_scattered;
+       unsigned long tx_no_memory;
+       unsigned long tx_no_space;
+       unsigned long tx_too_big;
+       unsigned long tx_busy;
+};
+
 struct netvsc_reconfig {
        struct list_head list;
        u32 event;
@@ -669,6 +689,7 @@ struct net_device_context {
        /* Ethtool settings */
        u8 duplex;
        u32 speed;
+       struct netvsc_ethtool_stats eth_stats;
 
        /* the device is going away */
        bool start_remove;
@@ -736,6 +757,9 @@ struct netvsc_device {
        u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
        u32 pkt_align; /* alignment bytes, e.g. 8 */
 
+       struct multi_recv_comp mrc[VRSS_CHANNEL_MAX];
+       atomic_t num_outstanding_recvs;
+
        atomic_t open_cnt;
 };
 
index 8078bc2..ff05b9b 100644 (file)
@@ -59,7 +59,6 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
                               VM_PKT_DATA_INBAND, 0);
 }
 
-
 static struct netvsc_device *alloc_net_device(void)
 {
        struct netvsc_device *net_device;
@@ -74,17 +73,26 @@ static struct netvsc_device *alloc_net_device(void)
                return NULL;
        }
 
+       net_device->mrc[0].buf = vzalloc(NETVSC_RECVSLOT_MAX *
+                                        sizeof(struct recv_comp_data));
+
        init_waitqueue_head(&net_device->wait_drain);
        net_device->destroy = false;
        atomic_set(&net_device->open_cnt, 0);
        net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
        net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
+       init_completion(&net_device->channel_init_wait);
 
        return net_device;
 }
 
 static void free_netvsc_device(struct netvsc_device *nvdev)
 {
+       int i;
+
+       for (i = 0; i < VRSS_CHANNEL_MAX; i++)
+               vfree(nvdev->mrc[i].buf);
+
        kfree(nvdev->cb_buffer);
        kfree(nvdev);
 }
@@ -107,20 +115,20 @@ static struct netvsc_device *get_inbound_net_device(struct hv_device *device)
                goto get_in_err;
 
        if (net_device->destroy &&
-               atomic_read(&net_device->num_outstanding_sends) == 0)
+           atomic_read(&net_device->num_outstanding_sends) == 0 &&
+           atomic_read(&net_device->num_outstanding_recvs) == 0)
                net_device = NULL;
 
 get_in_err:
        return net_device;
 }
 
-
-static int netvsc_destroy_buf(struct hv_device *device)
+static void netvsc_destroy_buf(struct hv_device *device)
 {
        struct nvsp_message *revoke_packet;
-       int ret = 0;
        struct net_device *ndev = hv_get_drvdata(device);
        struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
+       int ret;
 
        /*
         * If we got a section count, it means we received a
@@ -150,7 +158,7 @@ static int netvsc_destroy_buf(struct hv_device *device)
                if (ret != 0) {
                        netdev_err(ndev, "unable to send "
                                "revoke receive buffer to netvsp\n");
-                       return ret;
+                       return;
                }
        }
 
@@ -165,7 +173,7 @@ static int netvsc_destroy_buf(struct hv_device *device)
                if (ret != 0) {
                        netdev_err(ndev,
                                   "unable to teardown receive buffer's gpadl\n");
-                       return ret;
+                       return;
                }
                net_device->recv_buf_gpadl_handle = 0;
        }
@@ -209,7 +217,7 @@ static int netvsc_destroy_buf(struct hv_device *device)
                if (ret != 0) {
                        netdev_err(ndev, "unable to send "
                                   "revoke send buffer to netvsp\n");
-                       return ret;
+                       return;
                }
        }
        /* Teardown the gpadl on the vsp end */
@@ -223,7 +231,7 @@ static int netvsc_destroy_buf(struct hv_device *device)
                if (ret != 0) {
                        netdev_err(ndev,
                                   "unable to teardown send buffer's gpadl\n");
-                       return ret;
+                       return;
                }
                net_device->send_buf_gpadl_handle = 0;
        }
@@ -233,8 +241,6 @@ static int netvsc_destroy_buf(struct hv_device *device)
                net_device->send_buf = NULL;
        }
        kfree(net_device->send_section_map);
-
-       return ret;
 }
 
 static int netvsc_init_buf(struct hv_device *device)
@@ -276,7 +282,6 @@ static int netvsc_init_buf(struct hv_device *device)
                goto cleanup;
        }
 
-
        /* Notify the NetVsp of the gpadl handle */
        init_packet = &net_device->channel_init_pkt;
 
@@ -403,7 +408,7 @@ static int netvsc_init_buf(struct hv_device *device)
        /* Section count is simply the size divided by the section size.
         */
        net_device->send_section_cnt =
-               net_device->send_buf_size/net_device->send_section_size;
+               net_device->send_buf_size / net_device->send_section_size;
 
        dev_info(&device->device, "Send section size: %d, Section count:%d\n",
                 net_device->send_section_size, net_device->send_section_cnt);
@@ -412,8 +417,8 @@ static int netvsc_init_buf(struct hv_device *device)
        net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
                                             BITS_PER_LONG);
 
-       net_device->send_section_map =
-               kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);
+       net_device->send_section_map = kcalloc(net_device->map_words,
+                                              sizeof(ulong), GFP_KERNEL);
        if (net_device->send_section_map == NULL) {
                ret = -ENOMEM;
                goto cleanup;
@@ -428,7 +433,6 @@ exit:
        return ret;
 }
 
-
 /* Negotiate NVSP protocol version */
 static int negotiate_nvsp_ver(struct hv_device *device,
                              struct netvsc_device *net_device,
@@ -489,9 +493,10 @@ static int netvsc_connect_vsp(struct hv_device *device)
        struct netvsc_device *net_device;
        struct nvsp_message *init_packet;
        int ndis_version;
-       u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
+       const u32 ver_list[] = {
+               NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
                NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 };
-       int i, num_ver = 4; /* number of different NVSP versions */
+       int i;
 
        net_device = get_outbound_net_device(device);
        if (!net_device)
@@ -500,7 +505,7 @@ static int netvsc_connect_vsp(struct hv_device *device)
        init_packet = &net_device->channel_init_pkt;
 
        /* Negotiate the latest NVSP protocol supported */
-       for (i = num_ver - 1; i >= 0; i--)
+       for (i = ARRAY_SIZE(ver_list) - 1; i >= 0; i--)
                if (negotiate_nvsp_ver(device, net_device, init_packet,
                                       ver_list[i])  == 0) {
                        net_device->nvsp_version = ver_list[i];
@@ -559,7 +564,7 @@ static void netvsc_disconnect_vsp(struct hv_device *device)
 /*
  * netvsc_device_remove - Callback when the root bus device is removed
  */
-int netvsc_device_remove(struct hv_device *device)
+void netvsc_device_remove(struct hv_device *device)
 {
        struct net_device *ndev = hv_get_drvdata(device);
        struct net_device_context *net_device_ctx = netdev_priv(ndev);
@@ -581,10 +586,8 @@ int netvsc_device_remove(struct hv_device *device)
        /* Release all resources */
        vfree(net_device->sub_cb_buf);
        free_netvsc_device(net_device);
-       return 0;
 }
 
-
 #define RING_AVAIL_PERCENT_HIWATER 20
 #define RING_AVAIL_PERCENT_LOWATER 10
 
@@ -608,72 +611,79 @@ static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
        sync_change_bit(index, net_device->send_section_map);
 }
 
+static void netvsc_send_tx_complete(struct netvsc_device *net_device,
+                                   struct vmbus_channel *incoming_channel,
+                                   struct hv_device *device,
+                                   struct vmpacket_descriptor *packet)
+{
+       struct sk_buff *skb = (struct sk_buff *)(unsigned long)packet->trans_id;
+       struct net_device *ndev = hv_get_drvdata(device);
+       struct net_device_context *net_device_ctx = netdev_priv(ndev);
+       struct vmbus_channel *channel = device->channel;
+       int num_outstanding_sends;
+       u16 q_idx = 0;
+       int queue_sends;
+
+       /* Notify the layer above us */
+       if (likely(skb)) {
+               struct hv_netvsc_packet *nvsc_packet
+                       = (struct hv_netvsc_packet *)skb->cb;
+               u32 send_index = nvsc_packet->send_buf_index;
+
+               if (send_index != NETVSC_INVALID_INDEX)
+                       netvsc_free_send_slot(net_device, send_index);
+               q_idx = nvsc_packet->q_idx;
+               channel = incoming_channel;
+
+               dev_kfree_skb_any(skb);
+       }
+
+       num_outstanding_sends =
+               atomic_dec_return(&net_device->num_outstanding_sends);
+       queue_sends = atomic_dec_return(&net_device->queue_sends[q_idx]);
+
+       if (net_device->destroy && num_outstanding_sends == 0)
+               wake_up(&net_device->wait_drain);
+
+       if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
+           !net_device_ctx->start_remove &&
+           (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
+            queue_sends < 1))
+               netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
+}
+
 static void netvsc_send_completion(struct netvsc_device *net_device,
                                   struct vmbus_channel *incoming_channel,
                                   struct hv_device *device,
                                   struct vmpacket_descriptor *packet)
 {
        struct nvsp_message *nvsp_packet;
-       struct hv_netvsc_packet *nvsc_packet;
        struct net_device *ndev = hv_get_drvdata(device);
-       struct net_device_context *net_device_ctx = netdev_priv(ndev);
-       u32 send_index;
-       struct sk_buff *skb;
 
        nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
-                       (packet->offset8 << 3));
+                                             (packet->offset8 << 3));
 
-       if ((nvsp_packet->hdr.msg_type == NVSP_MSG_TYPE_INIT_COMPLETE) ||
-           (nvsp_packet->hdr.msg_type ==
-            NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
-           (nvsp_packet->hdr.msg_type ==
-            NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) ||
-           (nvsp_packet->hdr.msg_type ==
-            NVSP_MSG5_TYPE_SUBCHANNEL)) {
+       switch (nvsp_packet->hdr.msg_type) {
+       case NVSP_MSG_TYPE_INIT_COMPLETE:
+       case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
+       case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE:
+       case NVSP_MSG5_TYPE_SUBCHANNEL:
                /* Copy the response back */
                memcpy(&net_device->channel_init_pkt, nvsp_packet,
                       sizeof(struct nvsp_message));
                complete(&net_device->channel_init_wait);
-       } else if (nvsp_packet->hdr.msg_type ==
-                  NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
-               int num_outstanding_sends;
-               u16 q_idx = 0;
-               struct vmbus_channel *channel = device->channel;
-               int queue_sends;
-
-               /* Get the send context */
-               skb = (struct sk_buff *)(unsigned long)packet->trans_id;
-
-               /* Notify the layer above us */
-               if (skb) {
-                       nvsc_packet = (struct hv_netvsc_packet *) skb->cb;
-                       send_index = nvsc_packet->send_buf_index;
-                       if (send_index != NETVSC_INVALID_INDEX)
-                               netvsc_free_send_slot(net_device, send_index);
-                       q_idx = nvsc_packet->q_idx;
-                       channel = incoming_channel;
-                       dev_kfree_skb_any(skb);
-               }
-
-               num_outstanding_sends =
-                       atomic_dec_return(&net_device->num_outstanding_sends);
-               queue_sends = atomic_dec_return(&net_device->
-                                               queue_sends[q_idx]);
+               break;
 
-               if (net_device->destroy && num_outstanding_sends == 0)
-                       wake_up(&net_device->wait_drain);
+       case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
+               netvsc_send_tx_complete(net_device, incoming_channel,
+                                       device, packet);
+               break;
 
-               if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
-                   !net_device_ctx->start_remove &&
-                   (hv_ringbuf_avail_percent(&channel->outbound) >
-                    RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
-                               netif_tx_wake_queue(netdev_get_tx_queue(
-                                                   ndev, q_idx));
-       } else {
-               netdev_err(ndev, "Unknown send completion packet type- "
-                          "%d received!!\n", nvsp_packet->hdr.msg_type);
+       default:
+               netdev_err(ndev,
+                          "Unknown send completion type %d received!!\n",
+                          nvsp_packet->hdr.msg_type);
        }
-
 }
 
 static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
@@ -863,7 +873,7 @@ int netvsc_send(struct hv_device *device,
                struct sk_buff *skb)
 {
        struct netvsc_device *net_device;
-       int ret = 0, m_ret = 0;
+       int ret = 0;
        struct vmbus_channel *out_channel;
        u16 q_idx = packet->q_idx;
        u32 pktlen = packet->total_data_buflen, msd_len = 0;
@@ -952,8 +962,8 @@ int netvsc_send(struct hv_device *device,
        }
 
        if (msd_send) {
-               m_ret = netvsc_send_pkt(device, msd_send, net_device,
-                                       NULL, msd_skb);
+               int m_ret = netvsc_send_pkt(device, msd_send, net_device,
+                                           NULL, msd_skb);
 
                if (m_ret != 0) {
                        netvsc_free_send_slot(net_device,
@@ -972,49 +982,121 @@ send_now:
        return ret;
 }
 
-static void netvsc_send_recv_completion(struct hv_device *device,
-                                       struct vmbus_channel *channel,
-                                       struct netvsc_device *net_device,
-                                       u64 transaction_id, u32 status)
+static int netvsc_send_recv_completion(struct vmbus_channel *channel,
+                                      u64 transaction_id, u32 status)
 {
        struct nvsp_message recvcompMessage;
-       int retries = 0;
        int ret;
-       struct net_device *ndev = hv_get_drvdata(device);
 
        recvcompMessage.hdr.msg_type =
                                NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;
 
        recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status;
 
-retry_send_cmplt:
        /* Send the completion */
        ret = vmbus_sendpacket(channel, &recvcompMessage,
-                              sizeof(struct nvsp_message), transaction_id,
-                              VM_PKT_COMP, 0);
-       if (ret == 0) {
-               /* success */
-               /* no-op */
-       } else if (ret == -EAGAIN) {
-               /* no more room...wait a bit and attempt to retry 3 times */
-               retries++;
-               netdev_err(ndev, "unable to send receive completion pkt"
-                       " (tid %llx)...retrying %d\n", transaction_id, retries);
-
-               if (retries < 4) {
-                       udelay(100);
-                       goto retry_send_cmplt;
-               } else {
-                       netdev_err(ndev, "unable to send receive "
-                               "completion pkt (tid %llx)...give up retrying\n",
-                               transaction_id);
-               }
-       } else {
-               netdev_err(ndev, "unable to send receive "
-                       "completion pkt - %llx\n", transaction_id);
+                              sizeof(struct nvsp_message_header) + sizeof(u32),
+                              transaction_id, VM_PKT_COMP, 0);
+
+       return ret;
+}
+
+static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx,
+                                       u32 *filled, u32 *avail)
+{
+       u32 first = nvdev->mrc[q_idx].first;
+       u32 next = nvdev->mrc[q_idx].next;
+
+       *filled = (first > next) ? NETVSC_RECVSLOT_MAX - first + next :
+                 next - first;
+
+       *avail = NETVSC_RECVSLOT_MAX - *filled - 1;
+}
+
+/* Read the first filled slot, no change to index */
+static inline struct recv_comp_data *read_recv_comp_slot(struct netvsc_device
+                                                        *nvdev, u16 q_idx)
+{
+       u32 filled, avail;
+
+       if (!nvdev->mrc[q_idx].buf)
+               return NULL;
+
+       count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
+       if (!filled)
+               return NULL;
+
+       return nvdev->mrc[q_idx].buf + nvdev->mrc[q_idx].first *
+              sizeof(struct recv_comp_data);
+}
+
+/* Put the first filled slot back to available pool */
+static inline void put_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx)
+{
+       int num_recv;
+
+       nvdev->mrc[q_idx].first = (nvdev->mrc[q_idx].first + 1) %
+                                 NETVSC_RECVSLOT_MAX;
+
+       num_recv = atomic_dec_return(&nvdev->num_outstanding_recvs);
+
+       if (nvdev->destroy && num_recv == 0)
+               wake_up(&nvdev->wait_drain);
+}
+
+/* Check and send pending recv completions */
+static void netvsc_chk_recv_comp(struct netvsc_device *nvdev,
+                                struct vmbus_channel *channel, u16 q_idx)
+{
+       struct recv_comp_data *rcd;
+       int ret;
+
+       while (true) {
+               rcd = read_recv_comp_slot(nvdev, q_idx);
+               if (!rcd)
+                       break;
+
+               ret = netvsc_send_recv_completion(channel, rcd->tid,
+                                                 rcd->status);
+               if (ret)
+                       break;
+
+               put_recv_comp_slot(nvdev, q_idx);
        }
 }
 
+#define NETVSC_RCD_WATERMARK 80
+
+/* Get next available slot */
+static inline struct recv_comp_data *get_recv_comp_slot(
+       struct netvsc_device *nvdev, struct vmbus_channel *channel, u16 q_idx)
+{
+       u32 filled, avail, next;
+       struct recv_comp_data *rcd;
+
+       if (!nvdev->recv_section)
+               return NULL;
+
+       if (!nvdev->mrc[q_idx].buf)
+               return NULL;
+
+       if (atomic_read(&nvdev->num_outstanding_recvs) >
+           nvdev->recv_section->num_sub_allocs * NETVSC_RCD_WATERMARK / 100)
+               netvsc_chk_recv_comp(nvdev, channel, q_idx);
+
+       count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
+       if (!avail)
+               return NULL;
+
+       next = nvdev->mrc[q_idx].next;
+       rcd = nvdev->mrc[q_idx].buf + next * sizeof(struct recv_comp_data);
+       nvdev->mrc[q_idx].next = (next + 1) % NETVSC_RECVSLOT_MAX;
+
+       atomic_inc(&nvdev->num_outstanding_recvs);
+
+       return rcd;
+}
+
 static void netvsc_receive(struct netvsc_device *net_device,
                        struct vmbus_channel *channel,
                        struct hv_device *device,
@@ -1029,6 +1111,9 @@ static void netvsc_receive(struct netvsc_device *net_device,
        int count = 0;
        struct net_device *ndev = hv_get_drvdata(device);
        void *data;
+       int ret;
+       struct recv_comp_data *rcd;
+       u16 q_idx = channel->offermsg.offer.sub_channel_index;
 
        /*
         * All inbound packets other than send completion should be xfer page
@@ -1073,13 +1158,29 @@ static void netvsc_receive(struct netvsc_device *net_device,
                /* Pass it to the upper layer */
                status = rndis_filter_receive(device, netvsc_packet, &data,
                                              channel);
+       }
 
+       if (!net_device->mrc[q_idx].buf) {
+               ret = netvsc_send_recv_completion(channel,
+                                                 vmxferpage_packet->d.trans_id,
+                                                 status);
+               if (ret)
+                       netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n",
+                                  q_idx, vmxferpage_packet->d.trans_id, ret);
+               return;
        }
 
-       netvsc_send_recv_completion(device, channel, net_device,
-                                   vmxferpage_packet->d.trans_id, status);
-}
+       rcd = get_recv_comp_slot(net_device, channel, q_idx);
+
+       if (!rcd) {
+               netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
+                          q_idx, vmxferpage_packet->d.trans_id);
+               return;
+       }
 
+       rcd->tid = vmxferpage_packet->d.trans_id;
+       rcd->status = status;
+}
 
 static void netvsc_send_table(struct hv_device *hdev,
                              struct nvsp_message *nvmsg)
@@ -1161,11 +1262,11 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
        }
 }
 
-
 void netvsc_channel_cb(void *context)
 {
        int ret;
        struct vmbus_channel *channel = (struct vmbus_channel *)context;
+       u16 q_idx = channel->offermsg.offer.sub_channel_index;
        struct hv_device *device;
        struct netvsc_device *net_device;
        u32 bytes_recvd;
@@ -1217,8 +1318,6 @@ void netvsc_channel_cb(void *context)
                                                       ndev,
                                                       request_id,
                                                       desc);
-
-
                        } else {
                                /*
                                 * We are done for this pass.
@@ -1245,7 +1344,8 @@ void netvsc_channel_cb(void *context)
 
        if (bufferlen > NETVSC_PACKET_SIZE)
                kfree(buffer);
-       return;
+
+       netvsc_chk_recv_comp(net_device, channel, q_idx);
 }
 
 /*
@@ -1267,9 +1367,6 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
 
        net_device->ring_size = ring_size;
 
-       /* Initialize the NetVSC channel extension */
-       init_completion(&net_device->channel_init_wait);
-
        set_per_channel_state(device->channel, net_device->cb_buffer);
 
        /* Open the channel */
index eb2c122..2360e70 100644 (file)
@@ -40,7 +40,6 @@
 
 #include "hyperv_net.h"
 
-
 #define RING_SIZE_MIN 64
 #define LINKCHANGE_INT (2 * HZ)
 #define NETVSC_HW_FEATURES     (NETIF_F_RXCSUM | \
@@ -358,18 +357,14 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
        struct rndis_message *rndis_msg;
        struct rndis_packet *rndis_pkt;
        u32 rndis_msg_size;
-       bool isvlan;
-       bool linear = false;
        struct rndis_per_packet_info *ppi;
        struct ndis_tcp_ip_checksum_info *csum_info;
-       struct ndis_tcp_lso_info *lso_info;
        int  hdr_offset;
        u32 net_trans_info;
        u32 hash;
        u32 skb_length;
        struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
        struct hv_page_buffer *pb = page_buf;
-       struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats);
 
        /* We will atmost need two pages to describe the rndis
         * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
@@ -377,22 +372,20 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
         * more pages we try linearizing it.
         */
 
-check_size:
        skb_length = skb->len;
        num_data_pgs = netvsc_get_slots(skb) + 2;
-       if (num_data_pgs > MAX_PAGE_BUFFER_COUNT && linear) {
-               net_alert_ratelimited("packet too big: %u pages (%u bytes)\n",
-                                     num_data_pgs, skb->len);
-               ret = -EFAULT;
-               goto drop;
-       } else if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) {
-               if (skb_linearize(skb)) {
-                       net_alert_ratelimited("failed to linearize skb\n");
-                       ret = -ENOMEM;
+
+       if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) {
+               ++net_device_ctx->eth_stats.tx_scattered;
+
+               if (skb_linearize(skb))
+                       goto no_memory;
+
+               num_data_pgs = netvsc_get_slots(skb) + 2;
+               if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) {
+                       ++net_device_ctx->eth_stats.tx_too_big;
                        goto drop;
                }
-               linear = true;
-               goto check_size;
        }
 
        /*
@@ -401,17 +394,14 @@ check_size:
         * structure.
         */
        ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE);
-       if (ret) {
-               netdev_err(net, "unable to alloc hv_netvsc_packet\n");
-               ret = -ENOMEM;
-               goto drop;
-       }
+       if (ret)
+               goto no_memory;
+
        /* Use the skb control buffer for building up the packet */
        BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) >
                        FIELD_SIZEOF(struct sk_buff, cb));
        packet = (struct hv_netvsc_packet *)skb->cb;
 
-
        packet->q_idx = skb_get_queue_mapping(skb);
 
        packet->total_data_buflen = skb->len;
@@ -420,8 +410,6 @@ check_size:
 
        memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE);
 
-       isvlan = skb->vlan_tci & VLAN_TAG_PRESENT;
-
        /* Add the rndis header */
        rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
        rndis_msg->msg_len = packet->total_data_buflen;
@@ -440,7 +428,7 @@ check_size:
                *(u32 *)((void *)ppi + ppi->ppi_offset) = hash;
        }
 
-       if (isvlan) {
+       if (skb_vlan_tag_present(skb)) {
                struct ndis_pkt_8021q_info *vlan;
 
                rndis_msg_size += NDIS_VLAN_PPI_SIZE;
@@ -461,8 +449,37 @@ check_size:
         * Setup the sendside checksum offload only if this is not a
         * GSO packet.
         */
-       if (skb_is_gso(skb))
-               goto do_lso;
+       if (skb_is_gso(skb)) {
+               struct ndis_tcp_lso_info *lso_info;
+
+               rndis_msg_size += NDIS_LSO_PPI_SIZE;
+               ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
+                                   TCP_LARGESEND_PKTINFO);
+
+               lso_info = (struct ndis_tcp_lso_info *)((void *)ppi +
+                                                       ppi->ppi_offset);
+
+               lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
+               if (net_trans_info & (INFO_IPV4 << 16)) {
+                       lso_info->lso_v2_transmit.ip_version =
+                               NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
+                       ip_hdr(skb)->tot_len = 0;
+                       ip_hdr(skb)->check = 0;
+                       tcp_hdr(skb)->check =
+                               ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+                                                  ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
+               } else {
+                       lso_info->lso_v2_transmit.ip_version =
+                               NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
+                       ipv6_hdr(skb)->payload_len = 0;
+                       tcp_hdr(skb)->check =
+                               ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+                                                &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
+               }
+               lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset;
+               lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size;
+               goto do_send;
+       }
 
        if ((skb->ip_summed == CHECKSUM_NONE) ||
            (skb->ip_summed == CHECKSUM_UNNECESSARY))
@@ -495,7 +512,7 @@ check_size:
 
                ret = skb_cow_head(skb, 0);
                if (ret)
-                       goto drop;
+                       goto no_memory;
 
                uh = udp_hdr(skb);
                udp_len = ntohs(uh->len);
@@ -509,35 +526,6 @@ check_size:
 
                csum_info->transmit.udp_checksum = 0;
        }
-       goto do_send;
-
-do_lso:
-       rndis_msg_size += NDIS_LSO_PPI_SIZE;
-       ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
-                           TCP_LARGESEND_PKTINFO);
-
-       lso_info = (struct ndis_tcp_lso_info *)((void *)ppi +
-                       ppi->ppi_offset);
-
-       lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
-       if (net_trans_info & (INFO_IPV4 << 16)) {
-               lso_info->lso_v2_transmit.ip_version =
-                       NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
-               ip_hdr(skb)->tot_len = 0;
-               ip_hdr(skb)->check = 0;
-               tcp_hdr(skb)->check =
-               ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
-                                  ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
-       } else {
-               lso_info->lso_v2_transmit.ip_version =
-                       NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
-               ipv6_hdr(skb)->payload_len = 0;
-               tcp_hdr(skb)->check =
-               ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
-                               &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
-       }
-       lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset;
-       lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size;
 
 do_send:
        /* Start filling in the page buffers with the rndis hdr */
@@ -550,21 +538,33 @@ do_send:
        skb_tx_timestamp(skb);
        ret = netvsc_send(net_device_ctx->device_ctx, packet,
                          rndis_msg, &pb, skb);
+       if (likely(ret == 0)) {
+               struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats);
 
-drop:
-       if (ret == 0) {
                u64_stats_update_begin(&tx_stats->syncp);
                tx_stats->packets++;
                tx_stats->bytes += skb_length;
                u64_stats_update_end(&tx_stats->syncp);
-       } else {
-               if (ret != -EAGAIN) {
-                       dev_kfree_skb_any(skb);
-                       net->stats.tx_dropped++;
-               }
+               return NETDEV_TX_OK;
+       }
+
+       if (ret == -EAGAIN) {
+               ++net_device_ctx->eth_stats.tx_busy;
+               return NETDEV_TX_BUSY;
        }
 
-       return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK;
+       if (ret == -ENOSPC)
+               ++net_device_ctx->eth_stats.tx_no_space;
+
+drop:
+       dev_kfree_skb_any(skb);
+       net->stats.tx_dropped++;
+
+       return NETDEV_TX_OK;
+
+no_memory:
+       ++net_device_ctx->eth_stats.tx_no_memory;
+       goto drop;
 }
 
 /*
@@ -617,7 +617,6 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
        schedule_delayed_work(&ndev_ctx->dwork, 0);
 }
 
-
 static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
                                struct hv_netvsc_packet *packet,
                                struct ndis_tcp_ip_checksum_info *csum_info,
@@ -741,8 +740,12 @@ vf_injection_done:
 static void netvsc_get_drvinfo(struct net_device *net,
                               struct ethtool_drvinfo *info)
 {
+       struct net_device_context *net_device_ctx = netdev_priv(net);
+       struct hv_device *dev = net_device_ctx->device_ctx;
+
        strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
        strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
+       strlcpy(info->bus_info, vmbus_dev_name(dev), sizeof(info->bus_info));
 }
 
 static void netvsc_get_channels(struct net_device *net,
@@ -1018,6 +1021,51 @@ static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
        return err;
 }
 
+static const struct {
+       char name[ETH_GSTRING_LEN];
+       u16 offset;
+} netvsc_stats[] = {
+       { "tx_scattered", offsetof(struct netvsc_ethtool_stats, tx_scattered) },
+       { "tx_no_memory",  offsetof(struct netvsc_ethtool_stats, tx_no_memory) },
+       { "tx_no_space",  offsetof(struct netvsc_ethtool_stats, tx_no_space) },
+       { "tx_too_big",   offsetof(struct netvsc_ethtool_stats, tx_too_big) },
+       { "tx_busy",      offsetof(struct netvsc_ethtool_stats, tx_busy) },
+};
+
+static int netvsc_get_sset_count(struct net_device *dev, int string_set)
+{
+       switch (string_set) {
+       case ETH_SS_STATS:
+               return ARRAY_SIZE(netvsc_stats);
+       default:
+               return -EINVAL;
+       }
+}
+
+static void netvsc_get_ethtool_stats(struct net_device *dev,
+                                    struct ethtool_stats *stats, u64 *data)
+{
+       struct net_device_context *ndc = netdev_priv(dev);
+       const void *nds = &ndc->eth_stats;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
+               data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset);
+}
+
+static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+       int i;
+
+       switch (stringset) {
+       case ETH_SS_STATS:
+               for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
+                       memcpy(data + i * ETH_GSTRING_LEN,
+                              netvsc_stats[i].name, ETH_GSTRING_LEN);
+               break;
+       }
+}
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void netvsc_poll_controller(struct net_device *net)
 {
@@ -1030,6 +1078,9 @@ static void netvsc_poll_controller(struct net_device *net)
 static const struct ethtool_ops ethtool_ops = {
        .get_drvinfo    = netvsc_get_drvinfo,
        .get_link       = ethtool_op_get_link,
+       .get_ethtool_stats = netvsc_get_ethtool_stats,
+       .get_sset_count = netvsc_get_sset_count,
+       .get_strings    = netvsc_get_strings,
        .get_channels   = netvsc_get_channels,
        .set_channels   = netvsc_set_channels,
        .get_ts_info    = ethtool_op_get_ts_info,
@@ -1167,9 +1218,8 @@ static void netvsc_free_netdev(struct net_device *netdev)
 static struct net_device *get_netvsc_net_device(char *mac)
 {
        struct net_device *dev, *found = NULL;
-       int rtnl_locked;
 
-       rtnl_locked = rtnl_trylock();
+       ASSERT_RTNL();
 
        for_each_netdev(&init_net, dev) {
                if (memcmp(dev->dev_addr, mac, ETH_ALEN) == 0) {
@@ -1179,8 +1229,6 @@ static struct net_device *get_netvsc_net_device(char *mac)
                        break;
                }
        }
-       if (rtnl_locked)
-               rtnl_unlock();
 
        return found;
 }
@@ -1274,7 +1322,6 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
        return NOTIFY_OK;
 }
 
-
 static int netvsc_vf_down(struct net_device *vf_netdev)
 {
        struct net_device *ndev;
@@ -1308,7 +1355,6 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
        return NOTIFY_OK;
 }
 
-
 static int netvsc_unregister_vf(struct net_device *vf_netdev)
 {
        struct net_device *ndev;
@@ -1436,7 +1482,6 @@ static int netvsc_remove(struct hv_device *dev)
                return 0;
        }
 
-
        ndev_ctx = netdev_priv(net);
        net_device = ndev_ctx->nvdev;
 
@@ -1483,7 +1528,6 @@ static struct  hv_driver netvsc_drv = {
        .remove = netvsc_remove,
 };
 
-
 /*
  * On Hyper-V, every VF interface is matched with a corresponding
  * synthetic interface. The synthetic interface is presented first
index dd3b335..9195d5d 100644 (file)
@@ -663,13 +663,14 @@ cleanup:
        return ret;
 }
 
-u8 netvsc_hash_key[HASH_KEYLEN] = {
+static const u8 netvsc_hash_key[] = {
        0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
        0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
        0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
        0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
        0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
 };
+#define HASH_KEYLEN ARRAY_SIZE(netvsc_hash_key)
 
 static int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue)
 {
@@ -720,7 +721,6 @@ static int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue)
        for (i = 0; i < HASH_KEYLEN; i++)
                keyp[i] = netvsc_hash_key[i];
 
-
        ret = rndis_filter_send_request(rdev, request);
        if (ret != 0)
                goto cleanup;
@@ -738,7 +738,6 @@ cleanup:
        return ret;
 }
 
-
 static int rndis_filter_query_device_link_status(struct rndis_device *dev)
 {
        u32 size = sizeof(u32);
@@ -814,7 +813,6 @@ cleanup:
        return ret;
 }
 
-
 static int rndis_filter_init_device(struct rndis_device *dev)
 {
        struct rndis_request *request;
@@ -897,11 +895,11 @@ cleanup:
 
        /* Wait for all send completions */
        wait_event(nvdev->wait_drain,
-               atomic_read(&nvdev->num_outstanding_sends) == 0);
+                  atomic_read(&nvdev->num_outstanding_sends) == 0 &&
+                  atomic_read(&nvdev->num_outstanding_recvs) == 0);
 
        if (request)
                put_rndis_request(dev, request);
-       return;
 }
 
 static int rndis_filter_open_device(struct rndis_device *dev)
@@ -953,6 +951,9 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
        set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) *
                              NETVSC_PACKET_SIZE);
 
+       nvscdev->mrc[chn_index].buf = vzalloc(NETVSC_RECVSLOT_MAX *
+                                             sizeof(struct recv_comp_data));
+
        ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE,
                         nvscdev->ring_size * PAGE_SIZE, NULL, 0,
                         netvsc_channel_cb, new_sc);
@@ -968,7 +969,7 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
 }
 
 int rndis_filter_device_add(struct hv_device *dev,
-                                 void *additional_info)
+                           void *additional_info)
 {
        int ret;
        struct net_device *net = hv_get_drvdata(dev);
@@ -1050,7 +1051,6 @@ int rndis_filter_device_add(struct hv_device *dev,
        offloads.udp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
        offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED;
 
-
        ret = rndis_filter_set_offload_params(net, &offloads);
        if (ret)
                goto err_dev_remv;
@@ -1176,7 +1176,6 @@ void rndis_filter_device_remove(struct hv_device *dev)
        netvsc_device_remove(dev);
 }
 
-
 int rndis_filter_open(struct netvsc_device *nvdev)
 {
        if (!nvdev)
index 351e701..3ea47f2 100644 (file)
@@ -2973,6 +2973,7 @@ static void macsec_setup(struct net_device *dev)
        dev->priv_flags |= IFF_NO_QUEUE;
        dev->netdev_ops = &macsec_netdev_ops;
        dev->destructor = macsec_free_netdev;
+       SET_NETDEV_DEVTYPE(dev, &macsec_type);
 
        eth_zero_addr(dev->broadcast);
 }
index d66133b..5078a0d 100644 (file)
@@ -15,152 +15,218 @@ if PHYLIB
 config SWPHY
        bool
 
-comment "MII PHY device drivers"
-
-config AQUANTIA_PHY
-        tristate "Drivers for the Aquantia PHYs"
-        ---help---
-          Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405
+comment "MDIO bus device drivers"
 
-config AT803X_PHY
-       tristate "Drivers for Atheros AT803X PHYs"
-       ---help---
-         Currently supports the AT8030 and AT8035 model
+config MDIO_BCM_IPROC
+       tristate "Broadcom iProc MDIO bus controller"
+       depends on ARCH_BCM_IPROC || COMPILE_TEST
+       depends on HAS_IOMEM && OF_MDIO
+       help
+         This module provides a driver for the MDIO busses found in the
+         Broadcom iProc SoC's.
 
-config AMD_PHY
-       tristate "Drivers for the AMD PHYs"
-       ---help---
-         Currently supports the am79c874
+config MDIO_BCM_UNIMAC
+       tristate "Broadcom UniMAC MDIO bus controller"
+       depends on HAS_IOMEM
+       help
+         This module provides a driver for the Broadcom UniMAC MDIO busses.
+         This hardware can be found in the Broadcom GENET Ethernet MAC
+         controllers as well as some Broadcom Ethernet switches such as the
+         Starfighter 2 switches.
 
-config MARVELL_PHY
-       tristate "Drivers for Marvell PHYs"
-       ---help---
-         Currently has a driver for the 88E1011S
-       
-config DAVICOM_PHY
-       tristate "Drivers for Davicom PHYs"
-       ---help---
-         Currently supports dm9161e and dm9131
+config MDIO_BITBANG
+       tristate "Bitbanged MDIO buses"
+       help
+         This module implements the MDIO bus protocol in software,
+         for use by low level drivers that export the ability to
+         drive the relevant pins.
 
-config QSEMI_PHY
-       tristate "Drivers for Quality Semiconductor PHYs"
-       ---help---
-         Currently supports the qs6612
+         If in doubt, say N.
 
-config LXT_PHY
-       tristate "Drivers for the Intel LXT PHYs"
-       ---help---
-         Currently supports the lxt970, lxt971
+config MDIO_BUS_MUX
+       tristate
+       depends on OF_MDIO
+       help
+         This module provides a driver framework for MDIO bus
+         multiplexers which connect one of several child MDIO busses
+         to a parent bus.  Switching between child busses is done by
+         device specific drivers.
 
-config CICADA_PHY
-       tristate "Drivers for the Cicada PHYs"
-       ---help---
-         Currently supports the cis8204
+config MDIO_BUS_MUX_BCM_IPROC
+       tristate "Broadcom iProc based MDIO bus multiplexers"
+       depends on OF && OF_MDIO && (ARCH_BCM_IPROC || COMPILE_TEST)
+       select MDIO_BUS_MUX
+       default ARCH_BCM_IPROC
+       help
+         This module provides a driver for MDIO bus multiplexers found in
+         iProc based Broadcom SoCs. This multiplexer connects one of several
+         child MDIO bus to a parent bus. Buses could be internal as well as
+         external and selection logic lies inside the same multiplexer.
 
-config VITESSE_PHY
-        tristate "Drivers for the Vitesse PHYs"
-        ---help---
-          Currently supports the vsc8244
+config MDIO_BUS_MUX_GPIO
+       tristate "GPIO controlled MDIO bus multiplexers"
+       depends on OF_GPIO && OF_MDIO
+       select MDIO_BUS_MUX
+       help
+         This module provides a driver for MDIO bus multiplexers that
+         are controlled via GPIO lines.  The multiplexer connects one of
+         several child MDIO busses to a parent bus.  Child bus
+         selection is under the control of GPIO lines.
 
-config TERANETICS_PHY
-        tristate "Drivers for the Teranetics PHYs"
-        ---help---
-          Currently supports the Teranetics TN2020
+config MDIO_BUS_MUX_MMIOREG
+       tristate "MMIO device-controlled MDIO bus multiplexers"
+       depends on OF_MDIO && HAS_IOMEM
+       select MDIO_BUS_MUX
+       help
+         This module provides a driver for MDIO bus multiplexers that
+         are controlled via a simple memory-mapped device, like an FPGA.
+         The multiplexer connects one of several child MDIO busses to a
+         parent bus.  Child bus selection is under the control of one of
+         the FPGA's registers.
 
-config SMSC_PHY
-       tristate "Drivers for SMSC PHYs"
-       ---help---
-         Currently supports the LAN83C185, LAN8187 and LAN8700 PHYs
+         Currently, only 8-bit registers are supported.
 
-config BCM_NET_PHYLIB
+config MDIO_CAVIUM
        tristate
 
-config BROADCOM_PHY
-       tristate "Drivers for Broadcom PHYs"
-       select BCM_NET_PHYLIB
+config MDIO_GPIO
+       tristate "GPIO lib-based bitbanged MDIO buses"
+       depends on MDIO_BITBANG && GPIOLIB
        ---help---
-         Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
-         BCM5481 and BCM5482 PHYs.
+         Supports GPIO lib-based MDIO busses.
 
-config BCM_CYGNUS_PHY
-       tristate "Drivers for Broadcom Cygnus SoC internal PHY"
-       depends on ARCH_BCM_CYGNUS || COMPILE_TEST
-       depends on MDIO_BCM_IPROC
-       select BCM_NET_PHYLIB
+         To compile this driver as a module, choose M here: the module
+         will be called mdio-gpio.
+
+config MDIO_HISI_FEMAC
+       tristate "Hisilicon FEMAC MDIO bus controller"
+       depends on HAS_IOMEM && OF_MDIO
+       help
+         This module provides a driver for the MDIO busses found in the
+         Hisilicon SoC that have an Fast Ethernet MAC.
+
+config MDIO_MOXART
+        tristate "MOXA ART MDIO interface support"
+        depends on ARCH_MOXART
+        help
+          This driver supports the MDIO interface found in the network
+          interface units of the MOXA ART SoC
+
+config MDIO_OCTEON
+       tristate "Octeon and some ThunderX SOCs MDIO buses"
+       depends on 64BIT
+       depends on HAS_IOMEM
+       select MDIO_CAVIUM
+       help
+         This module provides a driver for the Octeon and ThunderX MDIO
+         buses. It is required by the Octeon and ThunderX ethernet device
+         drivers on some systems.
+
+config MDIO_SUN4I
+       tristate "Allwinner sun4i MDIO interface support"
+       depends on ARCH_SUNXI
+       help
+         This driver supports the MDIO interface found in the network
+         interface units of the Allwinner SoC that have an EMAC (A10,
+         A12, A10s, etc.)
+
+config MDIO_THUNDER
+       tristate "ThunderX SOCs MDIO buses"
+       depends on 64BIT
+       depends on PCI
+       select MDIO_CAVIUM
+       help
+         This driver supports the MDIO interfaces found on Cavium
+         ThunderX SoCs when the MDIO bus device appears as a PCI
+         device.
+
+config MDIO_XGENE
+       tristate "APM X-Gene SoC MDIO bus controller"
+       help
+         This module provides a driver for the MDIO busses found in the
+         APM X-Gene SoC's.
+
+comment "MII PHY device drivers"
+
+config AMD_PHY
+       tristate "AMD PHYs"
        ---help---
-         This PHY driver is for the 1G internal PHYs of the Broadcom
-         Cygnus Family SoC.
+         Currently supports the am79c874
 
-         Currently supports internal PHY's used in the BCM11300,
-         BCM11320, BCM11350, BCM11360, BCM58300, BCM58302,
-         BCM58303 & BCM58305 Broadcom Cygnus SoCs.
+config AQUANTIA_PHY
+        tristate "Aquantia PHYs"
+        ---help---
+          Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405
+
+config AT803X_PHY
+       tristate "AT803X PHYs"
+       ---help---
+         Currently supports the AT8030 and AT8035 model
 
 config BCM63XX_PHY
-       tristate "Drivers for Broadcom 63xx SOCs internal PHY"
+       tristate "Broadcom 63xx SOCs internal PHY"
        depends on BCM63XX
        select BCM_NET_PHYLIB
        ---help---
          Currently supports the 6348 and 6358 PHYs.
 
 config BCM7XXX_PHY
-       tristate "Drivers for Broadcom 7xxx SOCs internal PHYs"
+       tristate "Broadcom 7xxx SOCs internal PHYs"
        select BCM_NET_PHYLIB
        ---help---
          Currently supports the BCM7366, BCM7439, BCM7445, and
          40nm and 65nm generation of BCM7xxx Set Top Box SoCs.
 
 config BCM87XX_PHY
-       tristate "Driver for Broadcom BCM8706 and BCM8727 PHYs"
+       tristate "Broadcom BCM8706 and BCM8727 PHYs"
        help
          Currently supports the BCM8706 and BCM8727 10G Ethernet PHYs.
 
-config ICPLUS_PHY
-       tristate "Drivers for ICPlus PHYs"
+config BCM_CYGNUS_PHY
+       tristate "Broadcom Cygnus SoC internal PHY"
+       depends on ARCH_BCM_CYGNUS || COMPILE_TEST
+       depends on MDIO_BCM_IPROC
+       select BCM_NET_PHYLIB
        ---help---
-         Currently supports the IP175C and IP1001 PHYs.
+         This PHY driver is for the 1G internal PHYs of the Broadcom
+         Cygnus Family SoC.
 
-config REALTEK_PHY
-       tristate "Drivers for Realtek PHYs"
-       ---help---
-         Supports the Realtek 821x PHY.
+         Currently supports internal PHY's used in the BCM11300,
+         BCM11320, BCM11350, BCM11360, BCM58300, BCM58302,
+         BCM58303 & BCM58305 Broadcom Cygnus SoCs.
 
-config NATIONAL_PHY
-       tristate "Drivers for National Semiconductor PHYs"
-       ---help---
-         Currently supports the DP83865 PHY.
+config BCM_NET_PHYLIB
+       tristate
 
-config STE10XP
-       tristate "Driver for STMicroelectronics STe10Xp PHYs"
+config BROADCOM_PHY
+       tristate "Broadcom PHYs"
+       select BCM_NET_PHYLIB
        ---help---
-         This is the driver for the STe100p and STe101p PHYs.
+         Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
+         BCM5481 and BCM5482 PHYs.
 
-config LSI_ET1011C_PHY
-       tristate "Driver for LSI ET1011C PHY"
+config CICADA_PHY
+       tristate "Cicada PHYs"
        ---help---
-         Supports the LSI ET1011C PHY.
+         Currently supports the cis8204
 
-config MICREL_PHY
-       tristate "Driver for Micrel PHYs"
+config DAVICOM_PHY
+       tristate "Davicom PHYs"
        ---help---
-         Supports the KSZ9021, VSC8201, KS8001 PHYs.
+         Currently supports dm9161e and dm9131
 
 config DP83848_PHY
-       tristate "Driver for Texas Instruments DP83848 PHY"
+       tristate "Texas Instruments DP83848 PHY"
        ---help---
          Supports the DP83848 PHY.
 
 config DP83867_PHY
-       tristate "Drivers for Texas Instruments DP83867 Gigabit PHY"
+       tristate "Texas Instruments DP83867 Gigabit PHY"
        ---help---
          Currently supports the DP83867 PHY.
 
-config MICROCHIP_PHY
-       tristate "Drivers for Microchip PHYs"
-       help
-         Supports the LAN88XX PHYs.
-
 config FIXED_PHY
-       tristate "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs"
+       tristate "MDIO Bus/PHY emulation with fixed speed/link PHYs"
        depends on PHYLIB
        select SWPHY
        ---help---
@@ -169,148 +235,83 @@ config FIXED_PHY
 
          Currently tested with mpc866ads and mpc8349e-mitx.
 
-config MDIO_BITBANG
-       tristate "Support for bitbanged MDIO buses"
-       help
-         This module implements the MDIO bus protocol in software,
-         for use by low level drivers that export the ability to
-         drive the relevant pins.
-
-         If in doubt, say N.
-
-config MDIO_GPIO
-       tristate "Support for GPIO lib-based bitbanged MDIO buses"
-       depends on MDIO_BITBANG && GPIOLIB
+config ICPLUS_PHY
+       tristate "ICPlus PHYs"
        ---help---
-         Supports GPIO lib-based MDIO busses.
-
-         To compile this driver as a module, choose M here: the module
-         will be called mdio-gpio.
-
-config MDIO_CAVIUM
-       tristate
-
-config MDIO_OCTEON
-       tristate "Support for MDIO buses on Octeon and some ThunderX SOCs"
-       depends on 64BIT
-       depends on HAS_IOMEM
-       select MDIO_CAVIUM
-       help
-         This module provides a driver for the Octeon and ThunderX MDIO
-         buses. It is required by the Octeon and ThunderX ethernet device
-         drivers on some systems.
-
-config MDIO_THUNDER
-       tristate "Support for MDIO buses on ThunderX SOCs"
-       depends on 64BIT
-       depends on PCI
-       select MDIO_CAVIUM
-       help
-         This driver supports the MDIO interfaces found on Cavium
-         ThunderX SoCs when the MDIO bus device appears as a PCI
-         device.
+         Currently supports the IP175C and IP1001 PHYs.
 
+config INTEL_XWAY_PHY
+       tristate "Intel XWAY PHYs"
+       ---help---
+         Supports the Intel XWAY (former Lantiq) 11G and 22E PHYs.
+         These PHYs are marked as standalone chips under the names
+         PEF 7061, PEF 7071 and PEF 7072 or integrated into the Intel
+         SoCs xRX200, xRX300, xRX330, xRX350 and xRX550.
 
-config MDIO_SUN4I
-       tristate "Allwinner sun4i MDIO interface support"
-       depends on ARCH_SUNXI
-       help
-         This driver supports the MDIO interface found in the network
-         interface units of the Allwinner SoC that have an EMAC (A10,
-         A12, A10s, etc.)
+config LSI_ET1011C_PHY
+       tristate "LSI ET1011C PHY"
+       ---help---
+         Supports the LSI ET1011C PHY.
 
-config MDIO_MOXART
-        tristate "MOXA ART MDIO interface support"
-        depends on ARCH_MOXART
-        help
-          This driver supports the MDIO interface found in the network
-          interface units of the MOXA ART SoC
+config LXT_PHY
+       tristate "Intel LXT PHYs"
+       ---help---
+         Currently supports the lxt970, lxt971
 
-config MDIO_BUS_MUX
-       tristate
-       depends on OF_MDIO
-       help
-         This module provides a driver framework for MDIO bus
-         multiplexers which connect one of several child MDIO busses
-         to a parent bus.  Switching between child busses is done by
-         device specific drivers.
+config MARVELL_PHY
+       tristate "Marvell PHYs"
+       ---help---
+         Currently has a driver for the 88E1011S
 
-config MDIO_BUS_MUX_GPIO
-       tristate "Support for GPIO controlled MDIO bus multiplexers"
-       depends on OF_GPIO && OF_MDIO
-       select MDIO_BUS_MUX
-       help
-         This module provides a driver for MDIO bus multiplexers that
-         are controlled via GPIO lines.  The multiplexer connects one of
-         several child MDIO busses to a parent bus.  Child bus
-         selection is under the control of GPIO lines.
+config MICREL_PHY
+       tristate "Micrel PHYs"
+       ---help---
+         Supports the KSZ9021, VSC8201, KS8001 PHYs.
 
-config MDIO_BUS_MUX_MMIOREG
-       tristate "Support for MMIO device-controlled MDIO bus multiplexers"
-       depends on OF_MDIO && HAS_IOMEM
-       select MDIO_BUS_MUX
+config MICROCHIP_PHY
+       tristate "Microchip PHYs"
        help
-         This module provides a driver for MDIO bus multiplexers that
-         are controlled via a simple memory-mapped device, like an FPGA.
-         The multiplexer connects one of several child MDIO busses to a
-         parent bus.  Child bus selection is under the control of one of
-         the FPGA's registers.
+         Supports the LAN88XX PHYs.
 
-         Currently, only 8-bit registers are supported.
+config MICROSEMI_PHY
+       tristate "Microsemi PHYs"
+       ---help---
+         Currently supports the VSC8531 and VSC8541 PHYs
 
-config MDIO_BUS_MUX_BCM_IPROC
-       tristate "Support for iProc based MDIO bus multiplexers"
-       depends on OF && OF_MDIO && (ARCH_BCM_IPROC || COMPILE_TEST)
-       select MDIO_BUS_MUX
-       default ARCH_BCM_IPROC
-       help
-         This module provides a driver for MDIO bus multiplexers found in
-         iProc based Broadcom SoCs. This multiplexer connects one of several
-         child MDIO bus to a parent bus. Buses could be internal as well as
-         external and selection logic lies inside the same multiplexer.
+config NATIONAL_PHY
+       tristate "National Semiconductor PHYs"
+       ---help---
+         Currently supports the DP83865 PHY.
 
-config MDIO_BCM_UNIMAC
-       tristate "Broadcom UniMAC MDIO bus controller"
-       depends on HAS_IOMEM
-       help
-         This module provides a driver for the Broadcom UniMAC MDIO busses.
-         This hardware can be found in the Broadcom GENET Ethernet MAC
-         controllers as well as some Broadcom Ethernet switches such as the
-         Starfighter 2 switches.
+config QSEMI_PHY
+       tristate "Quality Semiconductor PHYs"
+       ---help---
+         Currently supports the qs6612
 
-config MDIO_BCM_IPROC
-       tristate "Broadcom iProc MDIO bus controller"
-       depends on ARCH_BCM_IPROC || COMPILE_TEST
-       depends on HAS_IOMEM && OF_MDIO
-       help
-         This module provides a driver for the MDIO busses found in the
-         Broadcom iProc SoC's.
+config REALTEK_PHY
+       tristate "Realtek PHYs"
+       ---help---
+         Supports the Realtek 821x PHY.
 
-config INTEL_XWAY_PHY
-       tristate "Driver for Intel XWAY PHYs"
+config SMSC_PHY
+       tristate "SMSC PHYs"
        ---help---
-         Supports the Intel XWAY (former Lantiq) 11G and 22E PHYs.
-         These PHYs are marked as standalone chips under the names
-         PEF 7061, PEF 7071 and PEF 7072 or integrated into the Intel
-         SoCs xRX200, xRX300, xRX330, xRX350 and xRX550.
+         Currently supports the LAN83C185, LAN8187 and LAN8700 PHYs
 
-config MDIO_HISI_FEMAC
-       tristate "Hisilicon FEMAC MDIO bus controller"
-       depends on HAS_IOMEM && OF_MDIO
-       help
-         This module provides a driver for the MDIO busses found in the
-         Hisilicon SoC that have an Fast Ethernet MAC.
+config STE10XP
+       tristate "STMicroelectronics STe10Xp PHYs"
+       ---help---
+         This is the driver for the STe100p and STe101p PHYs.
 
-config MDIO_XGENE
-       tristate "APM X-Gene SoC MDIO bus controller"
-       help
-         This module provides a driver for the MDIO busses found in the
-         APM X-Gene SoC's.
+config TERANETICS_PHY
+        tristate "Teranetics PHYs"
+        ---help---
+          Currently supports the Teranetics TN2020
 
-config MICROSEMI_PHY
-    tristate "Drivers for the Microsemi PHYs"
-    ---help---
-      Currently supports the VSC8531 and VSC8541 PHYs
+config VITESSE_PHY
+        tristate "Vitesse PHYs"
+        ---help---
+          Currently supports the vsc8244
 
 config XILINX_GMII2RGMII
        tristate "Xilinx GMII2RGMII converter driver"
@@ -319,6 +320,13 @@ config XILINX_GMII2RGMII
          the Reduced Gigabit Media Independent Interface(RGMII) between
          Ethernet physical media devices and the Gigabit Ethernet controller.
 
+config MDIO_XGENE
+       tristate "APM X-Gene SoC MDIO bus controller"
+       depends on ARCH_XGENE || COMPILE_TEST
+       help
+         This module provides a driver for the MDIO busses found in the
+         APM X-Gene SoC's.
+
 endif # PHYLIB
 
 config MICREL_KS8995MA
index 73d65ce..e58667d 100644 (file)
@@ -1,53 +1,55 @@
-# Makefile for Linux PHY drivers
+# Makefile for Linux PHY drivers and MDIO bus drivers
 
 libphy-y                       := phy.o phy_device.o mdio_bus.o mdio_device.o
 libphy-$(CONFIG_SWPHY)         += swphy.o
 
 obj-$(CONFIG_PHYLIB)           += libphy.o
+
+obj-$(CONFIG_MDIO_BCM_IPROC)   += mdio-bcm-iproc.o
+obj-$(CONFIG_MDIO_BCM_UNIMAC)  += mdio-bcm-unimac.o
+obj-$(CONFIG_MDIO_BITBANG)     += mdio-bitbang.o
+obj-$(CONFIG_MDIO_BUS_MUX)     += mdio-mux.o
+obj-$(CONFIG_MDIO_BUS_MUX_BCM_IPROC)   += mdio-mux-bcm-iproc.o
+obj-$(CONFIG_MDIO_BUS_MUX_GPIO)        += mdio-mux-gpio.o
+obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o
+obj-$(CONFIG_MDIO_CAVIUM)      += mdio-cavium.o
+obj-$(CONFIG_MDIO_GPIO)                += mdio-gpio.o
+obj-$(CONFIG_MDIO_HISI_FEMAC)  += mdio-hisi-femac.o
+obj-$(CONFIG_MDIO_MOXART)      += mdio-moxart.o
+obj-$(CONFIG_MDIO_OCTEON)      += mdio-octeon.o
+obj-$(CONFIG_MDIO_SUN4I)       += mdio-sun4i.o
+obj-$(CONFIG_MDIO_THUNDER)     += mdio-thunder.o
+obj-$(CONFIG_MDIO_XGENE)       += mdio-xgene.o
+
+obj-$(CONFIG_AMD_PHY)          += amd.o
 obj-$(CONFIG_AQUANTIA_PHY)     += aquantia.o
-obj-$(CONFIG_MARVELL_PHY)      += marvell.o
-obj-$(CONFIG_DAVICOM_PHY)      += davicom.o
-obj-$(CONFIG_CICADA_PHY)       += cicada.o
-obj-$(CONFIG_LXT_PHY)          += lxt.o
-obj-$(CONFIG_QSEMI_PHY)                += qsemi.o
-obj-$(CONFIG_SMSC_PHY)         += smsc.o
-obj-$(CONFIG_MICROSEMI_PHY) += mscc.o
-obj-$(CONFIG_TERANETICS_PHY)   += teranetics.o
-obj-$(CONFIG_VITESSE_PHY)      += vitesse.o
-obj-$(CONFIG_BCM_NET_PHYLIB)   += bcm-phy-lib.o
-obj-$(CONFIG_BROADCOM_PHY)     += broadcom.o
+obj-$(CONFIG_AT803X_PHY)       += at803x.o
 obj-$(CONFIG_BCM63XX_PHY)      += bcm63xx.o
 obj-$(CONFIG_BCM7XXX_PHY)      += bcm7xxx.o
 obj-$(CONFIG_BCM87XX_PHY)      += bcm87xx.o
 obj-$(CONFIG_BCM_CYGNUS_PHY)   += bcm-cygnus.o
-obj-$(CONFIG_ICPLUS_PHY)       += icplus.o
-obj-$(CONFIG_REALTEK_PHY)      += realtek.o
-obj-$(CONFIG_LSI_ET1011C_PHY)  += et1011c.o
-obj-$(CONFIG_FIXED_PHY)                += fixed_phy.o
-obj-$(CONFIG_MDIO_BITBANG)     += mdio-bitbang.o
-obj-$(CONFIG_MDIO_GPIO)                += mdio-gpio.o
-obj-$(CONFIG_NATIONAL_PHY)     += national.o
+obj-$(CONFIG_BCM_NET_PHYLIB)   += bcm-phy-lib.o
+obj-$(CONFIG_BROADCOM_PHY)     += broadcom.o
+obj-$(CONFIG_CICADA_PHY)       += cicada.o
+obj-$(CONFIG_DAVICOM_PHY)      += davicom.o
 obj-$(CONFIG_DP83640_PHY)      += dp83640.o
 obj-$(CONFIG_DP83848_PHY)      += dp83848.o
 obj-$(CONFIG_DP83867_PHY)      += dp83867.o
-obj-$(CONFIG_STE10XP)          += ste10Xp.o
-obj-$(CONFIG_MICREL_PHY)       += micrel.o
-obj-$(CONFIG_MDIO_OCTEON)      += mdio-octeon.o
-obj-$(CONFIG_MDIO_THUNDER)     += mdio-thunder.o
-obj-$(CONFIG_MDIO_CAVIUM)      += mdio-cavium.o
+obj-$(CONFIG_FIXED_PHY)                += fixed_phy.o
+obj-$(CONFIG_ICPLUS_PHY)       += icplus.o
+obj-$(CONFIG_INTEL_XWAY_PHY)   += intel-xway.o
+obj-$(CONFIG_LSI_ET1011C_PHY)  += et1011c.o
+obj-$(CONFIG_LXT_PHY)          += lxt.o
+obj-$(CONFIG_MARVELL_PHY)      += marvell.o
 obj-$(CONFIG_MICREL_KS8995MA)  += spi_ks8995.o
-obj-$(CONFIG_AT803X_PHY)       += at803x.o
-obj-$(CONFIG_AMD_PHY)          += amd.o
-obj-$(CONFIG_MDIO_BUS_MUX)     += mdio-mux.o
-obj-$(CONFIG_MDIO_BUS_MUX_GPIO)        += mdio-mux-gpio.o
-obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o
-obj-$(CONFIG_MDIO_BUS_MUX_BCM_IPROC)   += mdio-mux-bcm-iproc.o
-obj-$(CONFIG_MDIO_SUN4I)       += mdio-sun4i.o
-obj-$(CONFIG_MDIO_MOXART)      += mdio-moxart.o
-obj-$(CONFIG_MDIO_BCM_UNIMAC)  += mdio-bcm-unimac.o
+obj-$(CONFIG_MICREL_PHY)       += micrel.o
 obj-$(CONFIG_MICROCHIP_PHY)    += microchip.o
-obj-$(CONFIG_MDIO_BCM_IPROC)   += mdio-bcm-iproc.o
-obj-$(CONFIG_INTEL_XWAY_PHY)   += intel-xway.o
-obj-$(CONFIG_MDIO_HISI_FEMAC)  += mdio-hisi-femac.o
-obj-$(CONFIG_MDIO_XGENE)       += mdio-xgene.o
+obj-$(CONFIG_MICROSEMI_PHY)    += mscc.o
+obj-$(CONFIG_NATIONAL_PHY)     += national.o
+obj-$(CONFIG_QSEMI_PHY)                += qsemi.o
+obj-$(CONFIG_REALTEK_PHY)      += realtek.o
+obj-$(CONFIG_SMSC_PHY)         += smsc.o
+obj-$(CONFIG_STE10XP)          += ste10Xp.o
+obj-$(CONFIG_TERANETICS_PHY)   += teranetics.o
+obj-$(CONFIG_VITESSE_PHY)      += vitesse.o
 obj-$(CONFIG_XILINX_GMII2RGMII) += xilinx_gmii2rgmii.o
index 053e879..885ac9c 100644 (file)
@@ -964,7 +964,7 @@ static struct phy_driver ksphy_driver[] = {
        .get_strings    = kszphy_get_strings,
        .get_stats      = kszphy_get_stats,
        .suspend        = genphy_suspend,
-       .resume         = genphy_resume,
+       .resume         = kszphy_resume,
 }, {
        .phy_id         = PHY_ID_KSZ8873MLL,
        .phy_id_mask    = MICREL_PHY_ID_MASK,
index ad33390..c09cc4a 100644 (file)
 #include <linux/phy.h>
 
 enum rgmii_rx_clock_delay {
-       RGMII_RX_CLK_DELAY_0_2_NS = 0,
-       RGMII_RX_CLK_DELAY_0_8_NS = 1,
-       RGMII_RX_CLK_DELAY_1_1_NS = 2,
-       RGMII_RX_CLK_DELAY_1_7_NS = 3,
-       RGMII_RX_CLK_DELAY_2_0_NS = 4,
-       RGMII_RX_CLK_DELAY_2_3_NS = 5,
-       RGMII_RX_CLK_DELAY_2_6_NS = 6,
-       RGMII_RX_CLK_DELAY_3_4_NS = 7
+       RGMII_RX_CLK_DELAY_0_2_NS = 0,
+       RGMII_RX_CLK_DELAY_0_8_NS = 1,
+       RGMII_RX_CLK_DELAY_1_1_NS = 2,
+       RGMII_RX_CLK_DELAY_1_7_NS = 3,
+       RGMII_RX_CLK_DELAY_2_0_NS = 4,
+       RGMII_RX_CLK_DELAY_2_3_NS = 5,
+       RGMII_RX_CLK_DELAY_2_6_NS = 6,
+       RGMII_RX_CLK_DELAY_3_4_NS = 7
 };
 
-#define MII_VSC85XX_INT_MASK              25
-#define MII_VSC85XX_INT_MASK_MASK         0xa000
-#define MII_VSC85XX_INT_STATUS            26
+#define MII_VSC85XX_INT_MASK             25
+#define MII_VSC85XX_INT_MASK_MASK        0xa000
+#define MII_VSC85XX_INT_STATUS           26
 
-#define MSCC_EXT_PAGE_ACCESS              31
-#define MSCC_PHY_PAGE_STANDARD            0x0000 /* Standard registers */
-#define MSCC_PHY_PAGE_EXTENDED_2          0x0002 /* Extended reg - page 2 */
+#define MSCC_EXT_PAGE_ACCESS             31
+#define MSCC_PHY_PAGE_STANDARD           0x0000 /* Standard registers */
+#define MSCC_PHY_PAGE_EXTENDED_2         0x0002 /* Extended reg - page 2 */
 
 /* Extended Page 2 Registers */
-#define MSCC_PHY_RGMII_CNTL                       20
-#define RGMII_RX_CLK_DELAY_MASK                   0x0070
-#define RGMII_RX_CLK_DELAY_POS            4
+#define MSCC_PHY_RGMII_CNTL              20
+#define RGMII_RX_CLK_DELAY_MASK                  0x0070
+#define RGMII_RX_CLK_DELAY_POS           4
 
 /* Microsemi PHY ID's */
-#define PHY_ID_VSC8531                            0x00070570
-#define PHY_ID_VSC8541                            0x00070770
+#define PHY_ID_VSC8531                   0x00070570
+#define PHY_ID_VSC8541                   0x00070770
 
 static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page)
 {
-       int rc;
+       int rc;
 
-       rc = phy_write(phydev, MSCC_EXT_PAGE_ACCESS, page);
-       return rc;
+       rc = phy_write(phydev, MSCC_EXT_PAGE_ACCESS, page);
+       return rc;
 }
 
 static int vsc85xx_default_config(struct phy_device *phydev)
 {
-       int rc;
-       u16 reg_val;
+       int rc;
+       u16 reg_val;
 
-       mutex_lock(&phydev->lock);
-       rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2);
-       if (rc != 0)
-               goto out_unlock;
+       mutex_lock(&phydev->lock);
+       rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2);
+       if (rc != 0)
+               goto out_unlock;
 
-       reg_val = phy_read(phydev, MSCC_PHY_RGMII_CNTL);
-       reg_val &= ~(RGMII_RX_CLK_DELAY_MASK);
-       reg_val |= (RGMII_RX_CLK_DELAY_1_1_NS << RGMII_RX_CLK_DELAY_POS);
-       phy_write(phydev, MSCC_PHY_RGMII_CNTL, reg_val);
-       rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD);
+       reg_val = phy_read(phydev, MSCC_PHY_RGMII_CNTL);
+       reg_val &= ~(RGMII_RX_CLK_DELAY_MASK);
+       reg_val |= (RGMII_RX_CLK_DELAY_1_1_NS << RGMII_RX_CLK_DELAY_POS);
+       phy_write(phydev, MSCC_PHY_RGMII_CNTL, reg_val);
+       rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD);
 
 out_unlock:
-       mutex_unlock(&phydev->lock);
+       mutex_unlock(&phydev->lock);
 
-       return rc;
+       return rc;
 }
 
 static int vsc85xx_config_init(struct phy_device *phydev)
 {
-       int rc;
+       int rc;
 
-       rc = vsc85xx_default_config(phydev);
-       if (rc)
-               return rc;
-       rc = genphy_config_init(phydev);
+       rc = vsc85xx_default_config(phydev);
+       if (rc)
+               return rc;
+       rc = genphy_config_init(phydev);
 
-       return rc;
+       return rc;
 }
 
 static int vsc85xx_ack_interrupt(struct phy_device *phydev)
 {
-       int rc = 0;
+       int rc = 0;
 
-       if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
-               rc = phy_read(phydev, MII_VSC85XX_INT_STATUS);
+       if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
+               rc = phy_read(phydev, MII_VSC85XX_INT_STATUS);
 
-       return (rc < 0) ? rc : 0;
+       return (rc < 0) ? rc : 0;
 }
 
 static int vsc85xx_config_intr(struct phy_device *phydev)
 {
-       int rc;
-
-       if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
-               rc = phy_write(phydev, MII_VSC85XX_INT_MASK,
-                                  MII_VSC85XX_INT_MASK_MASK);
-       } else {
-               rc = phy_write(phydev, MII_VSC85XX_INT_MASK, 0);
-               if (rc < 0)
-                       return rc;
-               rc = phy_read(phydev, MII_VSC85XX_INT_STATUS);
-       }
-
-       return rc;
+       int rc;
+
+       if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+               rc = phy_write(phydev, MII_VSC85XX_INT_MASK,
+                              MII_VSC85XX_INT_MASK_MASK);
+       } else {
+               rc = phy_write(phydev, MII_VSC85XX_INT_MASK, 0);
+               if (rc < 0)
+                       return rc;
+               rc = phy_read(phydev, MII_VSC85XX_INT_STATUS);
+       }
+
+       return rc;
 }
 
 /* Microsemi VSC85xx PHYs */
 static struct phy_driver vsc85xx_driver[] = {
 {
-       .phy_id                 = PHY_ID_VSC8531,
-       .name                   = "Microsemi VSC8531",
-       .phy_id_mask    = 0xfffffff0,
-       .features               = PHY_GBIT_FEATURES,
-       .flags                  = PHY_HAS_INTERRUPT,
-       .soft_reset             = &genphy_soft_reset,
-       .config_init    = &vsc85xx_config_init,
-       .config_aneg    = &genphy_config_aneg,
-       .aneg_done              = &genphy_aneg_done,
-       .read_status    = &genphy_read_status,
-       .ack_interrupt  = &vsc85xx_ack_interrupt,
-       .config_intr    = &vsc85xx_config_intr,
-       .suspend                = &genphy_suspend,
-       .resume                 = &genphy_resume,
+       .phy_id         = PHY_ID_VSC8531,
+       .name           = "Microsemi VSC8531",
+       .phy_id_mask    = 0xfffffff0,
+       .features       = PHY_GBIT_FEATURES,
+       .flags          = PHY_HAS_INTERRUPT,
+       .soft_reset     = &genphy_soft_reset,
+       .config_init    = &vsc85xx_config_init,
+       .config_aneg    = &genphy_config_aneg,
+       .aneg_done      = &genphy_aneg_done,
+       .read_status    = &genphy_read_status,
+       .ack_interrupt  = &vsc85xx_ack_interrupt,
+       .config_intr    = &vsc85xx_config_intr,
+       .suspend        = &genphy_suspend,
+       .resume         = &genphy_resume,
 },
 {
-       .phy_id                 = PHY_ID_VSC8541,
-       .name                   = "Microsemi VSC8541 SyncE",
-       .phy_id_mask    = 0xfffffff0,
-       .features               = PHY_GBIT_FEATURES,
-       .flags                  = PHY_HAS_INTERRUPT,
-       .soft_reset             = &genphy_soft_reset,
-       .config_init    = &vsc85xx_config_init,
-       .config_aneg    = &genphy_config_aneg,
-       .aneg_done              = &genphy_aneg_done,
-       .read_status    = &genphy_read_status,
-       .ack_interrupt  = &vsc85xx_ack_interrupt,
-       .config_intr    = &vsc85xx_config_intr,
-       .suspend                = &genphy_suspend,
-       .resume                 = &genphy_resume,
+       .phy_id         = PHY_ID_VSC8541,
+       .name           = "Microsemi VSC8541 SyncE",
+       .phy_id_mask    = 0xfffffff0,
+       .features       = PHY_GBIT_FEATURES,
+       .flags          = PHY_HAS_INTERRUPT,
+       .soft_reset     = &genphy_soft_reset,
+       .config_init    = &vsc85xx_config_init,
+       .config_aneg    = &genphy_config_aneg,
+       .aneg_done      = &genphy_aneg_done,
+       .read_status    = &genphy_read_status,
+       .ack_interrupt  = &vsc85xx_ack_interrupt,
+       .config_intr    = &vsc85xx_config_intr,
+       .suspend        = &genphy_suspend,
+       .resume         = &genphy_resume,
 }
 
 };
@@ -149,9 +149,9 @@ static struct phy_driver vsc85xx_driver[] = {
 module_phy_driver(vsc85xx_driver);
 
 static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = {
-       { PHY_ID_VSC8531, 0xfffffff0, },
-       { PHY_ID_VSC8541, 0xfffffff0, },
-       { }
+       { PHY_ID_VSC8531, 0xfffffff0, },
+       { PHY_ID_VSC8541, 0xfffffff0, },
+       { }
 };
 
 MODULE_DEVICE_TABLE(mdio, vsc85xx_tbl);
index c5dc2c3..c6f6683 100644 (file)
@@ -722,8 +722,10 @@ phy_err:
 int phy_start_interrupts(struct phy_device *phydev)
 {
        atomic_set(&phydev->irq_disable, 0);
-       if (request_irq(phydev->irq, phy_interrupt, 0, "phy_interrupt",
-                       phydev) < 0) {
+       if (request_irq(phydev->irq, phy_interrupt,
+                               IRQF_SHARED,
+                               "phy_interrupt",
+                               phydev) < 0) {
                pr_warn("%s: Can't get IRQ %d (PHY)\n",
                        phydev->mdio.bus->name, phydev->irq);
                phydev->irq = PHY_POLL;
index cad6e19..d15dd39 100644 (file)
@@ -1,7 +1,9 @@
 /* Xilinx GMII2RGMII Converter driver
  *
  * Copyright (C) 2016 Xilinx, Inc.
+ * Copyright (C) 2016 Andrew Lunn <andrew@lunn.ch>
  *
+ * Author: Andrew Lunn <andrew@lunn.ch>
  * Author: Kedareswara rao Appana <appanad@xilinx.com>
  *
  * Description:
@@ -56,7 +58,7 @@ static int xgmiitorgmii_read_status(struct phy_device *phydev)
        return 0;
 }
 
-int xgmiitorgmii_probe(struct mdio_device *mdiodev)
+static int xgmiitorgmii_probe(struct mdio_device *mdiodev)
 {
        struct device *dev = &mdiodev->dev;
        struct device_node *np = dev->of_node, *phy_node;
@@ -73,6 +75,7 @@ int xgmiitorgmii_probe(struct mdio_device *mdiodev)
        }
 
        priv->phy_dev = of_phy_find_device(phy_node);
+       of_node_put(phy_node);
        if (!priv->phy_dev) {
                dev_info(dev, "Couldn't find phydev\n");
                return -EPROBE_DEFER;
index 70cfa06..5489c0e 100644 (file)
@@ -1363,6 +1363,8 @@ static void ppp_setup(struct net_device *dev)
        dev->netdev_ops = &ppp_netdev_ops;
        SET_NETDEV_DEVTYPE(dev, &ppp_type);
 
+       dev->features |= NETIF_F_LLTX;
+
        dev->hard_header_len = PPP_HDRLEN;
        dev->mtu = PPP_MRU;
        dev->addr_len = 0;
@@ -1376,12 +1378,8 @@ static void ppp_setup(struct net_device *dev)
  * Transmit-side routines.
  */
 
-/*
- * Called to do any work queued up on the transmit side
- * that can now be done.
- */
-static void
-ppp_xmit_process(struct ppp *ppp)
+/* Called to do any work queued up on the transmit side that can now be done */
+static void __ppp_xmit_process(struct ppp *ppp)
 {
        struct sk_buff *skb;
 
@@ -1401,6 +1399,30 @@ ppp_xmit_process(struct ppp *ppp)
        ppp_xmit_unlock(ppp);
 }
 
+static DEFINE_PER_CPU(int, ppp_xmit_recursion);
+
+static void ppp_xmit_process(struct ppp *ppp)
+{
+       local_bh_disable();
+
+       if (unlikely(__this_cpu_read(ppp_xmit_recursion)))
+               goto err;
+
+       __this_cpu_inc(ppp_xmit_recursion);
+       __ppp_xmit_process(ppp);
+       __this_cpu_dec(ppp_xmit_recursion);
+
+       local_bh_enable();
+
+       return;
+
+err:
+       local_bh_enable();
+
+       if (net_ratelimit())
+               netdev_err(ppp->dev, "recursion detected\n");
+}
+
 static inline struct sk_buff *
 pad_compress_skb(struct ppp *ppp, struct sk_buff *skb)
 {
@@ -1856,11 +1878,8 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
 }
 #endif /* CONFIG_PPP_MULTILINK */
 
-/*
- * Try to send data out on a channel.
- */
-static void
-ppp_channel_push(struct channel *pch)
+/* Try to send data out on a channel */
+static void __ppp_channel_push(struct channel *pch)
 {
        struct sk_buff *skb;
        struct ppp *ppp;
@@ -1885,11 +1904,22 @@ ppp_channel_push(struct channel *pch)
                read_lock_bh(&pch->upl);
                ppp = pch->ppp;
                if (ppp)
-                       ppp_xmit_process(ppp);
+                       __ppp_xmit_process(ppp);
                read_unlock_bh(&pch->upl);
        }
 }
 
+static void ppp_channel_push(struct channel *pch)
+{
+       local_bh_disable();
+
+       __this_cpu_inc(ppp_xmit_recursion);
+       __ppp_channel_push(pch);
+       __this_cpu_dec(ppp_xmit_recursion);
+
+       local_bh_enable();
+}
+
 /*
  * Receive-side routines.
  */
index cdb19b3..b228bea 100644 (file)
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/netdevice.h>
+#include <linux/etherdevice.h>
 #include <linux/filter.h>
 #include <linux/if_team.h>
 
+static rx_handler_result_t lb_receive(struct team *team, struct team_port *port,
+                                     struct sk_buff *skb)
+{
+       if (unlikely(skb->protocol == htons(ETH_P_SLOW))) {
+               /* LACPDU packets should go to exact delivery */
+               const unsigned char *dest = eth_hdr(skb)->h_dest;
+
+               if (is_link_local_ether_addr(dest) && dest[5] == 0x02)
+                       return RX_HANDLER_EXACT;
+       }
+       return RX_HANDLER_ANOTHER;
+}
+
 struct lb_priv;
 
 typedef struct team_port *lb_select_tx_port_func_t(struct team *,
@@ -652,6 +666,7 @@ static const struct team_mode_ops lb_mode_ops = {
        .port_enter             = lb_port_enter,
        .port_leave             = lb_port_leave,
        .port_disabled          = lb_port_disabled,
+       .receive                = lb_receive,
        .transmit               = lb_transmit,
 };
 
index 9c8b5bc..8093e39 100644 (file)
@@ -731,14 +731,9 @@ static int update_filter(struct tap_filter *filter, void __user *arg)
        }
 
        alen = ETH_ALEN * uf.count;
-       addr = kmalloc(alen, GFP_KERNEL);
-       if (!addr)
-               return -ENOMEM;
-
-       if (copy_from_user(addr, arg + sizeof(uf), alen)) {
-               err = -EFAULT;
-               goto done;
-       }
+       addr = memdup_user(arg + sizeof(uf), alen);
+       if (IS_ERR(addr))
+               return PTR_ERR(addr);
 
        /* The filter is updated without holding any locks. Which is
         * perfectly safe. We disable it first and in the worst
@@ -758,7 +753,7 @@ static int update_filter(struct tap_filter *filter, void __user *arg)
        for (; n < uf.count; n++) {
                if (!is_multicast_ether_addr(addr[n].u)) {
                        err = 0; /* no filter */
-                       goto done;
+                       goto free_addr;
                }
                addr_hash_set(filter->mask, addr[n].u);
        }
@@ -774,8 +769,7 @@ static int update_filter(struct tap_filter *filter, void __user *arg)
 
        /* Return the number of exact filters */
        err = nexact;
-
-done:
+free_addr:
        kfree(addr);
        return err;
 }
@@ -894,11 +888,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
        if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
                goto drop;
 
-       if (skb->sk && sk_fullsock(skb->sk)) {
-               sock_tx_timestamp(skb->sk, skb->sk->sk_tsflags,
-                                 &skb_shinfo(skb)->tx_flags);
-               sw_tx_timestamp(skb);
-       }
+       skb_tx_timestamp(skb);
 
        /* Orphan the skb - required as we might hang on to it
         * for indefinite time.
index a2d3ea6..d109242 100644 (file)
@@ -46,6 +46,7 @@
 #define AX_CMD_SET_SW_MII              0x06
 #define AX_CMD_READ_MII_REG            0x07
 #define AX_CMD_WRITE_MII_REG           0x08
+#define AX_CMD_STATMNGSTS_REG          0x09
 #define AX_CMD_SET_HW_MII              0x0a
 #define AX_CMD_READ_EEPROM             0x0b
 #define AX_CMD_WRITE_EEPROM            0x0c
 #define AX_CMD_SW_RESET                        0x20
 #define AX_CMD_SW_PHY_STATUS           0x21
 #define AX_CMD_SW_PHY_SELECT           0x22
+#define AX_QCTCTRL                     0x2A
+
+#define AX_CHIPCODE_MASK               0x70
+#define AX_AX88772_CHIPCODE            0x00
+#define AX_AX88772A_CHIPCODE           0x10
+#define AX_AX88772B_CHIPCODE           0x20
+#define AX_HOST_EN                     0x01
+
+#define AX_PHYSEL_PSEL                 0x01
+#define AX_PHYSEL_SSMII                        0
+#define AX_PHYSEL_SSEN                 0x10
 
 #define AX_PHY_SELECT_MASK             (BIT(3) | BIT(2))
 #define AX_PHY_SELECT_INTERNAL         0
@@ -173,6 +185,10 @@ struct asix_rx_fixup_info {
 };
 
 struct asix_common_private {
+       void (*resume)(struct usbnet *dev);
+       void (*suspend)(struct usbnet *dev);
+       u16 presvd_phy_advertise;
+       u16 presvd_phy_bmcr;
        struct asix_rx_fixup_info rx_fixup_info;
 };
 
@@ -182,10 +198,10 @@ extern const struct driver_info ax88172a_info;
 #define FLAG_EEPROM_MAC                (1UL << 0)  /* init device MAC from eeprom */
 
 int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
-                 u16 size, void *data);
+                 u16 size, void *data, int in_pm);
 
 int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
-                  u16 size, void *data);
+                  u16 size, void *data, int in_pm);
 
 void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value,
                          u16 index, u16 size, void *data);
@@ -197,27 +213,31 @@ int asix_rx_fixup_common(struct usbnet *dev, struct sk_buff *skb);
 struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
                              gfp_t flags);
 
-int asix_set_sw_mii(struct usbnet *dev);
-int asix_set_hw_mii(struct usbnet *dev);
+int asix_set_sw_mii(struct usbnet *dev, int in_pm);
+int asix_set_hw_mii(struct usbnet *dev, int in_pm);
 
 int asix_read_phy_addr(struct usbnet *dev, int internal);
 int asix_get_phy_addr(struct usbnet *dev);
 
-int asix_sw_reset(struct usbnet *dev, u8 flags);
+int asix_sw_reset(struct usbnet *dev, u8 flags, int in_pm);
 
-u16 asix_read_rx_ctl(struct usbnet *dev);
-int asix_write_rx_ctl(struct usbnet *dev, u16 mode);
+u16 asix_read_rx_ctl(struct usbnet *dev, int in_pm);
+int asix_write_rx_ctl(struct usbnet *dev, u16 mode, int in_pm);
 
-u16 asix_read_medium_status(struct usbnet *dev);
-int asix_write_medium_mode(struct usbnet *dev, u16 mode);
+u16 asix_read_medium_status(struct usbnet *dev, int in_pm);
+int asix_write_medium_mode(struct usbnet *dev, u16 mode, int in_pm);
 
-int asix_write_gpio(struct usbnet *dev, u16 value, int sleep);
+int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm);
 
 void asix_set_multicast(struct net_device *net);
 
 int asix_mdio_read(struct net_device *netdev, int phy_id, int loc);
 void asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val);
 
+int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc);
+void asix_mdio_write_nopm(struct net_device *netdev, int phy_id, int loc,
+                         int val);
+
 void asix_get_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo);
 int asix_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo);
 
index 7de5ab5..f79eb12 100644 (file)
 #include "asix.h"
 
 int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
-                 u16 size, void *data)
+                 u16 size, void *data, int in_pm)
 {
        int ret;
-       ret = usbnet_read_cmd(dev, cmd,
-                              USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
-                              value, index, data, size);
+       int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16);
+
+       BUG_ON(!dev);
+
+       if (!in_pm)
+               fn = usbnet_read_cmd;
+       else
+               fn = usbnet_read_cmd_nopm;
+
+       ret = fn(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+                value, index, data, size);
+
+       if (unlikely(ret < 0))
+               netdev_warn(dev->net, "Failed to read reg index 0x%04x: %d\n",
+                           index, ret);
 
-       if (ret != size && ret >= 0)
-               return -EINVAL;
        return ret;
 }
 
 int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
-                  u16 size, void *data)
+                  u16 size, void *data, int in_pm)
 {
-       return usbnet_write_cmd(dev, cmd,
-                               USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
-                               value, index, data, size);
+       int ret;
+       int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16);
+
+       BUG_ON(!dev);
+
+       if (!in_pm)
+               fn = usbnet_write_cmd;
+       else
+               fn = usbnet_write_cmd_nopm;
+
+       ret = fn(dev, cmd, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+                value, index, data, size);
+
+       if (unlikely(ret < 0))
+               netdev_warn(dev->net, "Failed to write reg index 0x%04x: %d\n",
+                           index, ret);
+
+       return ret;
 }
 
 void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, u16 index,
@@ -225,19 +250,20 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
        return skb;
 }
 
-int asix_set_sw_mii(struct usbnet *dev)
+int asix_set_sw_mii(struct usbnet *dev, int in_pm)
 {
        int ret;
-       ret = asix_write_cmd(dev, AX_CMD_SET_SW_MII, 0x0000, 0, 0, NULL);
+       ret = asix_write_cmd(dev, AX_CMD_SET_SW_MII, 0x0000, 0, 0, NULL, in_pm);
+
        if (ret < 0)
                netdev_err(dev->net, "Failed to enable software MII access\n");
        return ret;
 }
 
-int asix_set_hw_mii(struct usbnet *dev)
+int asix_set_hw_mii(struct usbnet *dev, int in_pm)
 {
        int ret;
-       ret = asix_write_cmd(dev, AX_CMD_SET_HW_MII, 0x0000, 0, 0, NULL);
+       ret = asix_write_cmd(dev, AX_CMD_SET_HW_MII, 0x0000, 0, 0, NULL, in_pm);
        if (ret < 0)
                netdev_err(dev->net, "Failed to enable hardware MII access\n");
        return ret;
@@ -247,7 +273,7 @@ int asix_read_phy_addr(struct usbnet *dev, int internal)
 {
        int offset = (internal ? 1 : 0);
        u8 buf[2];
-       int ret = asix_read_cmd(dev, AX_CMD_READ_PHY_ID, 0, 0, 2, buf);
+       int ret = asix_read_cmd(dev, AX_CMD_READ_PHY_ID, 0, 0, 2, buf, 0);
 
        netdev_dbg(dev->net, "asix_get_phy_addr()\n");
 
@@ -270,21 +296,21 @@ int asix_get_phy_addr(struct usbnet *dev)
 }
 
 
-int asix_sw_reset(struct usbnet *dev, u8 flags)
+int asix_sw_reset(struct usbnet *dev, u8 flags, int in_pm)
 {
        int ret;
 
-        ret = asix_write_cmd(dev, AX_CMD_SW_RESET, flags, 0, 0, NULL);
+       ret = asix_write_cmd(dev, AX_CMD_SW_RESET, flags, 0, 0, NULL, in_pm);
        if (ret < 0)
                netdev_err(dev->net, "Failed to send software reset: %02x\n", ret);
 
        return ret;
 }
 
-u16 asix_read_rx_ctl(struct usbnet *dev)
+u16 asix_read_rx_ctl(struct usbnet *dev, int in_pm)
 {
        __le16 v;
-       int ret = asix_read_cmd(dev, AX_CMD_READ_RX_CTL, 0, 0, 2, &v);
+       int ret = asix_read_cmd(dev, AX_CMD_READ_RX_CTL, 0, 0, 2, &v, in_pm);
 
        if (ret < 0) {
                netdev_err(dev->net, "Error reading RX_CTL register: %02x\n", ret);
@@ -295,12 +321,12 @@ out:
        return ret;
 }
 
-int asix_write_rx_ctl(struct usbnet *dev, u16 mode)
+int asix_write_rx_ctl(struct usbnet *dev, u16 mode, int in_pm)
 {
        int ret;
 
        netdev_dbg(dev->net, "asix_write_rx_ctl() - mode = 0x%04x\n", mode);
-       ret = asix_write_cmd(dev, AX_CMD_WRITE_RX_CTL, mode, 0, 0, NULL);
+       ret = asix_write_cmd(dev, AX_CMD_WRITE_RX_CTL, mode, 0, 0, NULL, in_pm);
        if (ret < 0)
                netdev_err(dev->net, "Failed to write RX_CTL mode to 0x%04x: %02x\n",
                           mode, ret);
@@ -308,10 +334,11 @@ int asix_write_rx_ctl(struct usbnet *dev, u16 mode)
        return ret;
 }
 
-u16 asix_read_medium_status(struct usbnet *dev)
+u16 asix_read_medium_status(struct usbnet *dev, int in_pm)
 {
        __le16 v;
-       int ret = asix_read_cmd(dev, AX_CMD_READ_MEDIUM_STATUS, 0, 0, 2, &v);
+       int ret = asix_read_cmd(dev, AX_CMD_READ_MEDIUM_STATUS,
+                               0, 0, 2, &v, in_pm);
 
        if (ret < 0) {
                netdev_err(dev->net, "Error reading Medium Status register: %02x\n",
@@ -323,12 +350,13 @@ u16 asix_read_medium_status(struct usbnet *dev)
 
 }
 
-int asix_write_medium_mode(struct usbnet *dev, u16 mode)
+int asix_write_medium_mode(struct usbnet *dev, u16 mode, int in_pm)
 {
        int ret;
 
        netdev_dbg(dev->net, "asix_write_medium_mode() - mode = 0x%04x\n", mode);
-       ret = asix_write_cmd(dev, AX_CMD_WRITE_MEDIUM_MODE, mode, 0, 0, NULL);
+       ret = asix_write_cmd(dev, AX_CMD_WRITE_MEDIUM_MODE,
+                            mode, 0, 0, NULL, in_pm);
        if (ret < 0)
                netdev_err(dev->net, "Failed to write Medium Mode mode to 0x%04x: %02x\n",
                           mode, ret);
@@ -336,12 +364,12 @@ int asix_write_medium_mode(struct usbnet *dev, u16 mode)
        return ret;
 }
 
-int asix_write_gpio(struct usbnet *dev, u16 value, int sleep)
+int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm)
 {
        int ret;
 
        netdev_dbg(dev->net, "asix_write_gpio() - value = 0x%04x\n", value);
-       ret = asix_write_cmd(dev, AX_CMD_WRITE_GPIOS, value, 0, 0, NULL);
+       ret = asix_write_cmd(dev, AX_CMD_WRITE_GPIOS, value, 0, 0, NULL, in_pm);
        if (ret < 0)
                netdev_err(dev->net, "Failed to write GPIO value 0x%04x: %02x\n",
                           value, ret);
@@ -398,16 +426,31 @@ int asix_mdio_read(struct net_device *netdev, int phy_id, int loc)
 {
        struct usbnet *dev = netdev_priv(netdev);
        __le16 res;
+       u8 smsr;
+       int i = 0;
+       int ret;
 
        mutex_lock(&dev->phy_mutex);
-       asix_set_sw_mii(dev);
+       do {
+               ret = asix_set_sw_mii(dev, 0);
+               if (ret == -ENODEV)
+                       break;
+               usleep_range(1000, 1100);
+               ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG,
+                                   0, 0, 1, &smsr, 0);
+       } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV));
+       if (ret == -ENODEV) {
+               mutex_unlock(&dev->phy_mutex);
+               return ret;
+       }
+
        asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id,
-                               (__u16)loc, 2, &res);
-       asix_set_hw_mii(dev);
+                               (__u16)loc, 2, &res, 0);
+       asix_set_hw_mii(dev, 0);
        mutex_unlock(&dev->phy_mutex);
 
        netdev_dbg(dev->net, "asix_mdio_read() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n",
-                  phy_id, loc, le16_to_cpu(res));
+                       phy_id, loc, le16_to_cpu(res));
 
        return le16_to_cpu(res);
 }
@@ -416,13 +459,95 @@ void asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val)
 {
        struct usbnet *dev = netdev_priv(netdev);
        __le16 res = cpu_to_le16(val);
+       u8 smsr;
+       int i = 0;
+       int ret;
 
        netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n",
-                  phy_id, loc, val);
+                       phy_id, loc, val);
+
        mutex_lock(&dev->phy_mutex);
-       asix_set_sw_mii(dev);
-       asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id, (__u16)loc, 2, &res);
-       asix_set_hw_mii(dev);
+       do {
+               ret = asix_set_sw_mii(dev, 0);
+               if (ret == -ENODEV)
+                       break;
+               usleep_range(1000, 1100);
+               ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG,
+                                   0, 0, 1, &smsr, 0);
+       } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV));
+       if (ret == -ENODEV) {
+               mutex_unlock(&dev->phy_mutex);
+               return;
+       }
+
+       asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id,
+                      (__u16)loc, 2, &res, 0);
+       asix_set_hw_mii(dev, 0);
+       mutex_unlock(&dev->phy_mutex);
+}
+
+int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc)
+{
+       struct usbnet *dev = netdev_priv(netdev);
+       __le16 res;
+       u8 smsr;
+       int i = 0;
+       int ret;
+
+       mutex_lock(&dev->phy_mutex);
+       do {
+               ret = asix_set_sw_mii(dev, 1);
+               if (ret == -ENODEV)
+                       break;
+               usleep_range(1000, 1100);
+               ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG,
+                                   0, 0, 1, &smsr, 1);
+       } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV));
+       if (ret == -ENODEV) {
+               mutex_unlock(&dev->phy_mutex);
+               return ret;
+       }
+
+       asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id,
+                     (__u16)loc, 2, &res, 1);
+       asix_set_hw_mii(dev, 1);
+       mutex_unlock(&dev->phy_mutex);
+
+       netdev_dbg(dev->net, "asix_mdio_read_nopm() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n",
+                       phy_id, loc, le16_to_cpu(res));
+
+       return le16_to_cpu(res);
+}
+
+void
+asix_mdio_write_nopm(struct net_device *netdev, int phy_id, int loc, int val)
+{
+       struct usbnet *dev = netdev_priv(netdev);
+       __le16 res = cpu_to_le16(val);
+       u8 smsr;
+       int i = 0;
+       int ret;
+
+       netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n",
+                       phy_id, loc, val);
+
+       mutex_lock(&dev->phy_mutex);
+       do {
+               ret = asix_set_sw_mii(dev, 1);
+               if (ret == -ENODEV)
+                       break;
+               usleep_range(1000, 1100);
+               ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG,
+                                   0, 0, 1, &smsr, 1);
+       } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV));
+       if (ret == -ENODEV) {
+               mutex_unlock(&dev->phy_mutex);
+               return;
+       }
+
+       asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id,
+                      (__u16)loc, 2, &res, 1);
+       asix_set_hw_mii(dev, 1);
        mutex_unlock(&dev->phy_mutex);
 }
 
@@ -431,7 +556,8 @@ void asix_get_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo)
        struct usbnet *dev = netdev_priv(net);
        u8 opt;
 
-       if (asix_read_cmd(dev, AX_CMD_READ_MONITOR_MODE, 0, 0, 1, &opt) < 0) {
+       if (asix_read_cmd(dev, AX_CMD_READ_MONITOR_MODE,
+                         0, 0, 1, &opt, 0) < 0) {
                wolinfo->supported = 0;
                wolinfo->wolopts = 0;
                return;
@@ -455,7 +581,7 @@ int asix_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo)
                opt |= AX_MONITOR_MAGIC;
 
        if (asix_write_cmd(dev, AX_CMD_WRITE_MONITOR_MODE,
-                             opt, 0, 0, NULL) < 0)
+                             opt, 0, 0, NULL, 0) < 0)
                return -EINVAL;
 
        return 0;
@@ -490,7 +616,7 @@ int asix_get_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom,
        /* ax8817x returns 2 bytes from eeprom on read */
        for (i = first_word; i <= last_word; i++) {
                if (asix_read_cmd(dev, AX_CMD_READ_EEPROM, i, 0, 2,
-                                 &(eeprom_buff[i - first_word])) < 0) {
+                                 &eeprom_buff[i - first_word], 0) < 0) {
                        kfree(eeprom_buff);
                        return -EIO;
                }
@@ -531,7 +657,7 @@ int asix_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom,
           the EEPROM */
        if (eeprom->offset & 1) {
                ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, first_word, 0, 2,
-                                   &(eeprom_buff[0]));
+                                   &eeprom_buff[0], 0);
                if (ret < 0) {
                        netdev_err(net, "Failed to read EEPROM at offset 0x%02x.\n", first_word);
                        goto free;
@@ -540,7 +666,7 @@ int asix_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom,
 
        if ((eeprom->offset + eeprom->len) & 1) {
                ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, last_word, 0, 2,
-                                   &(eeprom_buff[last_word - first_word]));
+                                   &eeprom_buff[last_word - first_word], 0);
                if (ret < 0) {
                        netdev_err(net, "Failed to read EEPROM at offset 0x%02x.\n", last_word);
                        goto free;
@@ -550,7 +676,7 @@ int asix_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom,
        memcpy((u8 *)eeprom_buff + (eeprom->offset & 1), data, eeprom->len);
 
        /* write data to EEPROM */
-       ret = asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0x0000, 0, 0, NULL);
+       ret = asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0x0000, 0, 0, NULL, 0);
        if (ret < 0) {
                netdev_err(net, "Failed to enable EEPROM write\n");
                goto free;
@@ -561,7 +687,7 @@ int asix_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom,
                netdev_dbg(net, "write to EEPROM at offset 0x%02x, data 0x%04x\n",
                           i, eeprom_buff[i - first_word]);
                ret = asix_write_cmd(dev, AX_CMD_WRITE_EEPROM, i,
-                                    eeprom_buff[i - first_word], 0, NULL);
+                                    eeprom_buff[i - first_word], 0, NULL, 0);
                if (ret < 0) {
                        netdev_err(net, "Failed to write EEPROM at offset 0x%02x.\n",
                                   i);
@@ -570,7 +696,7 @@ int asix_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom,
                msleep(20);
        }
 
-       ret = asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0x0000, 0, 0, NULL);
+       ret = asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0x0000, 0, 0, NULL, 0);
        if (ret < 0) {
                netdev_err(net, "Failed to disable EEPROM write\n");
                goto free;
index 5cabefc..cce2495 100644 (file)
 
 #define        PHY_MODE_RTL8211CL      0x000C
 
+#define AX88772A_PHY14H                0x14
+#define AX88772A_PHY14H_DEFAULT 0x442C
+
+#define AX88772A_PHY15H                0x15
+#define AX88772A_PHY15H_DEFAULT 0x03C8
+
+#define AX88772A_PHY16H                0x16
+#define AX88772A_PHY16H_DEFAULT 0x4044
+
 struct ax88172_int_data {
        __le16 res1;
        u8 link;
@@ -79,6 +88,8 @@ static u32 asix_get_phyid(struct usbnet *dev)
        /* Poll for the rare case the FW or phy isn't ready yet.  */
        for (i = 0; i < 100; i++) {
                phy_reg = asix_mdio_read(dev->net, dev->mii.phy_id, MII_PHYSID1);
+               if (phy_reg < 0)
+                       return 0;
                if (phy_reg != 0 && phy_reg != 0xFFFF)
                        break;
                mdelay(1);
@@ -184,7 +195,7 @@ static int ax88172_link_reset(struct usbnet *dev)
        netdev_dbg(dev->net, "ax88172_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n",
                   ethtool_cmd_speed(&ecmd), ecmd.duplex, mode);
 
-       asix_write_medium_mode(dev, mode);
+       asix_write_medium_mode(dev, mode, 0);
 
        return 0;
 }
@@ -201,6 +212,28 @@ static const struct net_device_ops ax88172_netdev_ops = {
        .ndo_set_rx_mode        = ax88172_set_multicast,
 };
 
+static void asix_phy_reset(struct usbnet *dev, unsigned int reset_bits)
+{
+       unsigned int timeout = 5000;
+
+       asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, reset_bits);
+
+       /* give phy_id a chance to process reset */
+       udelay(500);
+
+       /* See IEEE 802.3 "22.2.4.1.1 Reset": 500ms max */
+       while (timeout--) {
+               if (asix_mdio_read(dev->net, dev->mii.phy_id, MII_BMCR)
+                                                       & BMCR_RESET)
+                       udelay(100);
+               else
+                       return;
+       }
+
+       netdev_err(dev->net, "BMCR_RESET timeout on phy_id %d\n",
+                  dev->mii.phy_id);
+}
+
 static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf)
 {
        int ret = 0;
@@ -213,18 +246,19 @@ static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf)
        /* Toggle the GPIOs in a manufacturer/model specific way */
        for (i = 2; i >= 0; i--) {
                ret = asix_write_cmd(dev, AX_CMD_WRITE_GPIOS,
-                               (gpio_bits >> (i * 8)) & 0xff, 0, 0, NULL);
+                               (gpio_bits >> (i * 8)) & 0xff, 0, 0, NULL, 0);
                if (ret < 0)
                        goto out;
                msleep(5);
        }
 
-       ret = asix_write_rx_ctl(dev, 0x80);
+       ret = asix_write_rx_ctl(dev, 0x80, 0);
        if (ret < 0)
                goto out;
 
        /* Get the MAC address */
-       ret = asix_read_cmd(dev, AX88172_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf);
+       ret = asix_read_cmd(dev, AX88172_CMD_READ_NODE_ID,
+                           0, 0, ETH_ALEN, buf, 0);
        if (ret < 0) {
                netdev_dbg(dev->net, "read AX_CMD_READ_NODE_ID failed: %d\n",
                           ret);
@@ -246,7 +280,7 @@ static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf)
        dev->net->needed_headroom = 4; /* cf asix_tx_fixup() */
        dev->net->needed_tailroom = 4; /* cf asix_tx_fixup() */
 
-       asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, BMCR_RESET);
+       asix_phy_reset(dev, BMCR_RESET);
        asix_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE,
                ADVERTISE_ALL | ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP);
        mii_nway_restart(&dev->mii);
@@ -290,86 +324,226 @@ static int ax88772_link_reset(struct usbnet *dev)
        netdev_dbg(dev->net, "ax88772_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n",
                   ethtool_cmd_speed(&ecmd), ecmd.duplex, mode);
 
-       asix_write_medium_mode(dev, mode);
+       asix_write_medium_mode(dev, mode, 0);
 
        return 0;
 }
 
 static int ax88772_reset(struct usbnet *dev)
+{
+       struct asix_data *data = (struct asix_data *)&dev->data;
+       int ret;
+
+       /* Rewrite MAC address */
+       ether_addr_copy(data->mac_addr, dev->net->dev_addr);
+       ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0,
+                            ETH_ALEN, data->mac_addr, 0);
+       if (ret < 0)
+               goto out;
+
+       /* Set RX_CTL to default values with 2k buffer, and enable cactus */
+       ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, 0);
+       if (ret < 0)
+               goto out;
+
+       asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, 0);
+       if (ret < 0)
+               goto out;
+
+       return 0;
+
+out:
+       return ret;
+}
+
+static int ax88772_hw_reset(struct usbnet *dev, int in_pm)
 {
        struct asix_data *data = (struct asix_data *)&dev->data;
        int ret, embd_phy;
        u16 rx_ctl;
 
-       ret = asix_write_gpio(dev,
-                       AX_GPIO_RSE | AX_GPIO_GPO_2 | AX_GPIO_GPO2EN, 5);
+       ret = asix_write_gpio(dev, AX_GPIO_RSE | AX_GPIO_GPO_2 |
+                             AX_GPIO_GPO2EN, 5, in_pm);
        if (ret < 0)
                goto out;
 
-       embd_phy = ((asix_get_phy_addr(dev) & 0x1f) == 0x10 ? 1 : 0);
+       embd_phy = ((dev->mii.phy_id & 0x1f) == 0x10 ? 1 : 0);
 
-       ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy, 0, 0, NULL);
+       ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy,
+                            0, 0, NULL, in_pm);
        if (ret < 0) {
                netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret);
                goto out;
        }
 
-       ret = asix_sw_reset(dev, AX_SWRESET_IPPD | AX_SWRESET_PRL);
-       if (ret < 0)
-               goto out;
+       if (embd_phy) {
+               ret = asix_sw_reset(dev, AX_SWRESET_IPPD, in_pm);
+               if (ret < 0)
+                       goto out;
 
-       msleep(150);
+               usleep_range(10000, 11000);
 
-       ret = asix_sw_reset(dev, AX_SWRESET_CLEAR);
-       if (ret < 0)
-               goto out;
+               ret = asix_sw_reset(dev, AX_SWRESET_CLEAR, in_pm);
+               if (ret < 0)
+                       goto out;
 
-       msleep(150);
+               msleep(60);
 
-       if (embd_phy) {
-               ret = asix_sw_reset(dev, AX_SWRESET_IPRL);
+               ret = asix_sw_reset(dev, AX_SWRESET_IPRL | AX_SWRESET_PRL,
+                                   in_pm);
                if (ret < 0)
                        goto out;
        } else {
-               ret = asix_sw_reset(dev, AX_SWRESET_PRTE);
+               ret = asix_sw_reset(dev, AX_SWRESET_IPPD | AX_SWRESET_PRL,
+                                   in_pm);
                if (ret < 0)
                        goto out;
        }
 
        msleep(150);
-       rx_ctl = asix_read_rx_ctl(dev);
-       netdev_dbg(dev->net, "RX_CTL is 0x%04x after software reset\n", rx_ctl);
-       ret = asix_write_rx_ctl(dev, 0x0000);
+
+       if (in_pm && (!asix_mdio_read_nopm(dev->net, dev->mii.phy_id,
+                                          MII_PHYSID1))){
+               ret = -EIO;
+               goto out;
+       }
+
+       ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, in_pm);
        if (ret < 0)
                goto out;
 
-       rx_ctl = asix_read_rx_ctl(dev);
-       netdev_dbg(dev->net, "RX_CTL is 0x%04x setting to 0x0000\n", rx_ctl);
+       ret = asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, in_pm);
+       if (ret < 0)
+               goto out;
+
+       ret = asix_write_cmd(dev, AX_CMD_WRITE_IPG0,
+                            AX88772_IPG0_DEFAULT | AX88772_IPG1_DEFAULT,
+                            AX88772_IPG2_DEFAULT, 0, NULL, in_pm);
+       if (ret < 0) {
+               netdev_dbg(dev->net, "Write IPG,IPG1,IPG2 failed: %d\n", ret);
+               goto out;
+       }
+
+       /* Rewrite MAC address */
+       ether_addr_copy(data->mac_addr, dev->net->dev_addr);
+       ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0,
+                            ETH_ALEN, data->mac_addr, in_pm);
+       if (ret < 0)
+               goto out;
 
-       ret = asix_sw_reset(dev, AX_SWRESET_PRL);
+       /* Set RX_CTL to default values with 2k buffer, and enable cactus */
+       ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, in_pm);
        if (ret < 0)
                goto out;
 
-       msleep(150);
+       rx_ctl = asix_read_rx_ctl(dev, in_pm);
+       netdev_dbg(dev->net, "RX_CTL is 0x%04x after all initializations\n",
+                  rx_ctl);
 
-       ret = asix_sw_reset(dev, AX_SWRESET_IPRL | AX_SWRESET_PRL);
+       rx_ctl = asix_read_medium_status(dev, in_pm);
+       netdev_dbg(dev->net,
+                  "Medium Status is 0x%04x after all initializations\n",
+                  rx_ctl);
+
+       return 0;
+
+out:
+       return ret;
+}
+
+static int ax88772a_hw_reset(struct usbnet *dev, int in_pm)
+{
+       struct asix_data *data = (struct asix_data *)&dev->data;
+       int ret, embd_phy;
+       u16 rx_ctl, phy14h, phy15h, phy16h;
+       u8 chipcode = 0;
+
+       ret = asix_write_gpio(dev, AX_GPIO_RSE, 5, in_pm);
        if (ret < 0)
                goto out;
 
-       msleep(150);
+       embd_phy = ((dev->mii.phy_id & 0x1f) == 0x10 ? 1 : 0);
 
-       asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, BMCR_RESET);
-       asix_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE,
-                       ADVERTISE_ALL | ADVERTISE_CSMA);
-       mii_nway_restart(&dev->mii);
+       ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy |
+                            AX_PHYSEL_SSEN, 0, 0, NULL, in_pm);
+       if (ret < 0) {
+               netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret);
+               goto out;
+       }
+       usleep_range(10000, 11000);
+
+       ret = asix_sw_reset(dev, AX_SWRESET_IPPD | AX_SWRESET_IPRL, in_pm);
+       if (ret < 0)
+               goto out;
+
+       usleep_range(10000, 11000);
+
+       ret = asix_sw_reset(dev, AX_SWRESET_IPRL, in_pm);
+       if (ret < 0)
+               goto out;
+
+       msleep(160);
+
+       ret = asix_sw_reset(dev, AX_SWRESET_CLEAR, in_pm);
+       if (ret < 0)
+               goto out;
 
-       ret = asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT);
+       ret = asix_sw_reset(dev, AX_SWRESET_IPRL, in_pm);
        if (ret < 0)
                goto out;
 
+       msleep(200);
+
+       if (in_pm && (!asix_mdio_read_nopm(dev->net, dev->mii.phy_id,
+                                          MII_PHYSID1))) {
+               ret = -1;
+               goto out;
+       }
+
+       ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0,
+                           0, 1, &chipcode, in_pm);
+       if (ret < 0)
+               goto out;
+
+       if ((chipcode & AX_CHIPCODE_MASK) == AX_AX88772B_CHIPCODE) {
+               ret = asix_write_cmd(dev, AX_QCTCTRL, 0x8000, 0x8001,
+                                    0, NULL, in_pm);
+               if (ret < 0) {
+                       netdev_dbg(dev->net, "Write BQ setting failed: %d\n",
+                                  ret);
+                       goto out;
+               }
+       } else if ((chipcode & AX_CHIPCODE_MASK) == AX_AX88772A_CHIPCODE) {
+               /* Check if the PHY registers have default settings */
+               phy14h = asix_mdio_read_nopm(dev->net, dev->mii.phy_id,
+                                            AX88772A_PHY14H);
+               phy15h = asix_mdio_read_nopm(dev->net, dev->mii.phy_id,
+                                            AX88772A_PHY15H);
+               phy16h = asix_mdio_read_nopm(dev->net, dev->mii.phy_id,
+                                            AX88772A_PHY16H);
+
+               netdev_dbg(dev->net,
+                          "772a_hw_reset: MR20=0x%x MR21=0x%x MR22=0x%x\n",
+                          phy14h, phy15h, phy16h);
+
+               /* Restore PHY registers default setting if not */
+               if (phy14h != AX88772A_PHY14H_DEFAULT)
+                       asix_mdio_write_nopm(dev->net, dev->mii.phy_id,
+                                            AX88772A_PHY14H,
+                                            AX88772A_PHY14H_DEFAULT);
+               if (phy15h != AX88772A_PHY15H_DEFAULT)
+                       asix_mdio_write_nopm(dev->net, dev->mii.phy_id,
+                                            AX88772A_PHY15H,
+                                            AX88772A_PHY15H_DEFAULT);
+               if (phy16h != AX88772A_PHY16H_DEFAULT)
+                       asix_mdio_write_nopm(dev->net, dev->mii.phy_id,
+                                            AX88772A_PHY16H,
+                                            AX88772A_PHY16H_DEFAULT);
+       }
+
        ret = asix_write_cmd(dev, AX_CMD_WRITE_IPG0,
                                AX88772_IPG0_DEFAULT | AX88772_IPG1_DEFAULT,
-                               AX88772_IPG2_DEFAULT, 0, NULL);
+                               AX88772_IPG2_DEFAULT, 0, NULL, in_pm);
        if (ret < 0) {
                netdev_dbg(dev->net, "Write IPG,IPG1,IPG2 failed: %d\n", ret);
                goto out;
@@ -378,20 +552,29 @@ static int ax88772_reset(struct usbnet *dev)
        /* Rewrite MAC address */
        memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN);
        ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN,
-                                                       data->mac_addr);
+                                                       data->mac_addr, in_pm);
        if (ret < 0)
                goto out;
 
        /* Set RX_CTL to default values with 2k buffer, and enable cactus */
-       ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL);
+       ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, in_pm);
        if (ret < 0)
                goto out;
 
-       rx_ctl = asix_read_rx_ctl(dev);
+       ret = asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, in_pm);
+       if (ret < 0)
+               return ret;
+
+       /* Set RX_CTL to default values with 2k buffer, and enable cactus */
+       ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, in_pm);
+       if (ret < 0)
+               goto out;
+
+       rx_ctl = asix_read_rx_ctl(dev, in_pm);
        netdev_dbg(dev->net, "RX_CTL is 0x%04x after all initializations\n",
                   rx_ctl);
 
-       rx_ctl = asix_read_medium_status(dev);
+       rx_ctl = asix_read_medium_status(dev, in_pm);
        netdev_dbg(dev->net,
                   "Medium Status is 0x%04x after all initializations\n",
                   rx_ctl);
@@ -400,7 +583,6 @@ static int ax88772_reset(struct usbnet *dev)
 
 out:
        return ret;
-
 }
 
 static const struct net_device_ops ax88772_netdev_ops = {
@@ -415,11 +597,97 @@ static const struct net_device_ops ax88772_netdev_ops = {
        .ndo_set_rx_mode        = asix_set_multicast,
 };
 
+static void ax88772_suspend(struct usbnet *dev)
+{
+       struct asix_common_private *priv = dev->driver_priv;
+       u16 medium;
+
+       /* Stop MAC operation */
+       medium = asix_read_medium_status(dev, 0);
+       medium &= ~AX_MEDIUM_RE;
+       asix_write_medium_mode(dev, medium, 0);
+
+       netdev_dbg(dev->net, "ax88772_suspend: medium=0x%04x\n",
+                  asix_read_medium_status(dev, 0));
+
+       /* Preserve BMCR for restoring */
+       priv->presvd_phy_bmcr =
+               asix_mdio_read_nopm(dev->net, dev->mii.phy_id, MII_BMCR);
+
+       /* Preserve ANAR for restoring */
+       priv->presvd_phy_advertise =
+               asix_mdio_read_nopm(dev->net, dev->mii.phy_id, MII_ADVERTISE);
+}
+
+static int asix_suspend(struct usb_interface *intf, pm_message_t message)
+{
+       struct usbnet *dev = usb_get_intfdata(intf);
+       struct asix_common_private *priv = dev->driver_priv;
+
+       if (priv->suspend)
+               priv->suspend(dev);
+
+       return usbnet_suspend(intf, message);
+}
+
+static void ax88772_restore_phy(struct usbnet *dev)
+{
+       struct asix_common_private *priv = dev->driver_priv;
+
+       if (priv->presvd_phy_advertise) {
+               /* Restore Advertisement control reg */
+               asix_mdio_write_nopm(dev->net, dev->mii.phy_id, MII_ADVERTISE,
+                                    priv->presvd_phy_advertise);
+
+               /* Restore BMCR */
+               asix_mdio_write_nopm(dev->net, dev->mii.phy_id, MII_BMCR,
+                                    priv->presvd_phy_bmcr);
+
+               mii_nway_restart(&dev->mii);
+               priv->presvd_phy_advertise = 0;
+               priv->presvd_phy_bmcr = 0;
+       }
+}
+
+static void ax88772_resume(struct usbnet *dev)
+{
+       int i;
+
+       for (i = 0; i < 3; i++)
+               if (!ax88772_hw_reset(dev, 1))
+                       break;
+       ax88772_restore_phy(dev);
+}
+
+static void ax88772a_resume(struct usbnet *dev)
+{
+       int i;
+
+       for (i = 0; i < 3; i++) {
+               if (!ax88772a_hw_reset(dev, 1))
+                       break;
+       }
+
+       ax88772_restore_phy(dev);
+}
+
+static int asix_resume(struct usb_interface *intf)
+{
+       struct usbnet *dev = usb_get_intfdata(intf);
+       struct asix_common_private *priv = dev->driver_priv;
+
+       if (priv->resume)
+               priv->resume(dev);
+
+       return usbnet_resume(intf);
+}
+
 static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
 {
-       int ret, embd_phy, i;
-       u8 buf[ETH_ALEN];
+       int ret, i;
+       u8 buf[ETH_ALEN], chipcode = 0;
        u32 phyid;
+       struct asix_common_private *priv;
 
        usbnet_get_endpoints(dev,intf);
 
@@ -427,13 +695,13 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
        if (dev->driver_info->data & FLAG_EEPROM_MAC) {
                for (i = 0; i < (ETH_ALEN >> 1); i++) {
                        ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x04 + i,
-                                       0, 2, buf + i * 2);
+                                           0, 2, buf + i * 2, 0);
                        if (ret < 0)
                                break;
                }
        } else {
                ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID,
-                               0, 0, ETH_ALEN, buf);
+                               0, 0, ETH_ALEN, buf, 0);
        }
 
        if (ret < 0) {
@@ -456,16 +724,11 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
        dev->net->needed_headroom = 4; /* cf asix_tx_fixup() */
        dev->net->needed_tailroom = 4; /* cf asix_tx_fixup() */
 
-       embd_phy = ((dev->mii.phy_id & 0x1f) == 0x10 ? 1 : 0);
-
-       /* Reset the PHY to normal operation mode */
-       ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy, 0, 0, NULL);
-       if (ret < 0) {
-               netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret);
-               return ret;
-       }
+       asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0);
+       chipcode &= AX_CHIPCODE_MASK;
 
-       ax88772_reset(dev);
+       (chipcode == AX_AX88772_CHIPCODE) ? ax88772_hw_reset(dev, 0) :
+                                           ax88772a_hw_reset(dev, 0);
 
        /* Read PHYID register *AFTER* the PHY was reset properly */
        phyid = asix_get_phyid(dev);
@@ -482,6 +745,18 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
        if (!dev->driver_priv)
                return -ENOMEM;
 
+       priv = dev->driver_priv;
+
+       priv->presvd_phy_bmcr = 0;
+       priv->presvd_phy_advertise = 0;
+       if (chipcode == AX_AX88772_CHIPCODE) {
+               priv->resume = ax88772_resume;
+               priv->suspend = ax88772_suspend;
+       } else {
+               priv->resume = ax88772a_resume;
+               priv->suspend = ax88772_suspend;
+       }
+
        return 0;
 }
 
@@ -593,12 +868,12 @@ static int ax88178_reset(struct usbnet *dev)
        int gpio0 = 0;
        u32 phyid;
 
-       asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status);
+       asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status, 0);
        netdev_dbg(dev->net, "GPIO Status: 0x%04x\n", status);
 
-       asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0, 0, 0, NULL);
-       asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom);
-       asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0, 0, 0, NULL);
+       asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0, 0, 0, NULL, 0);
+       asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom, 0);
+       asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0, 0, 0, NULL, 0);
 
        netdev_dbg(dev->net, "EEPROM index 0x17 is 0x%04x\n", eeprom);
 
@@ -614,15 +889,16 @@ static int ax88178_reset(struct usbnet *dev)
        netdev_dbg(dev->net, "GPIO0: %d, PhyMode: %d\n", gpio0, data->phymode);
 
        /* Power up external GigaPHY through AX88178 GPIO pin */
-       asix_write_gpio(dev, AX_GPIO_RSE | AX_GPIO_GPO_1 | AX_GPIO_GPO1EN, 40);
+       asix_write_gpio(dev, AX_GPIO_RSE | AX_GPIO_GPO_1 |
+                       AX_GPIO_GPO1EN, 40, 0);
        if ((le16_to_cpu(eeprom) >> 8) != 1) {
-               asix_write_gpio(dev, 0x003c, 30);
-               asix_write_gpio(dev, 0x001c, 300);
-               asix_write_gpio(dev, 0x003c, 30);
+               asix_write_gpio(dev, 0x003c, 30, 0);
+               asix_write_gpio(dev, 0x001c, 300, 0);
+               asix_write_gpio(dev, 0x003c, 30, 0);
        } else {
                netdev_dbg(dev->net, "gpio phymode == 1 path\n");
-               asix_write_gpio(dev, AX_GPIO_GPO1EN, 30);
-               asix_write_gpio(dev, AX_GPIO_GPO1EN | AX_GPIO_GPO_1, 30);
+               asix_write_gpio(dev, AX_GPIO_GPO1EN, 30, 0);
+               asix_write_gpio(dev, AX_GPIO_GPO1EN | AX_GPIO_GPO_1, 30, 0);
        }
 
        /* Read PHYID register *AFTER* powering up PHY */
@@ -630,15 +906,15 @@ static int ax88178_reset(struct usbnet *dev)
        netdev_dbg(dev->net, "PHYID=0x%08x\n", phyid);
 
        /* Set AX88178 to enable MII/GMII/RGMII interface for external PHY */
-       asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, 0, 0, 0, NULL);
+       asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, 0, 0, 0, NULL, 0);
 
-       asix_sw_reset(dev, 0);
+       asix_sw_reset(dev, 0, 0);
        msleep(150);
 
-       asix_sw_reset(dev, AX_SWRESET_PRL | AX_SWRESET_IPPD);
+       asix_sw_reset(dev, AX_SWRESET_PRL | AX_SWRESET_IPPD, 0);
        msleep(150);
 
-       asix_write_rx_ctl(dev, 0);
+       asix_write_rx_ctl(dev, 0, 0);
 
        if (data->phymode == PHY_MODE_MARVELL) {
                marvell_phy_init(dev);
@@ -646,27 +922,23 @@ static int ax88178_reset(struct usbnet *dev)
        } else if (data->phymode == PHY_MODE_RTL8211CL)
                rtl8211cl_phy_init(dev);
 
-       asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR,
-                       BMCR_RESET | BMCR_ANENABLE);
+       asix_phy_reset(dev, BMCR_RESET | BMCR_ANENABLE);
        asix_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE,
                        ADVERTISE_ALL | ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP);
        asix_mdio_write(dev->net, dev->mii.phy_id, MII_CTRL1000,
                        ADVERTISE_1000FULL);
 
+       asix_write_medium_mode(dev, AX88178_MEDIUM_DEFAULT, 0);
        mii_nway_restart(&dev->mii);
 
-       ret = asix_write_medium_mode(dev, AX88178_MEDIUM_DEFAULT);
-       if (ret < 0)
-               return ret;
-
        /* Rewrite MAC address */
        memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN);
        ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN,
-                                                       data->mac_addr);
+                                                       data->mac_addr, 0);
        if (ret < 0)
                return ret;
 
-       ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL);
+       ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, 0);
        if (ret < 0)
                return ret;
 
@@ -704,7 +976,7 @@ static int ax88178_link_reset(struct usbnet *dev)
        netdev_dbg(dev->net, "ax88178_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n",
                   speed, ecmd.duplex, mode);
 
-       asix_write_medium_mode(dev, mode);
+       asix_write_medium_mode(dev, mode, 0);
 
        if (data->phymode == PHY_MODE_MARVELL && data->ledmode)
                marvell_led_status(dev, speed);
@@ -733,15 +1005,15 @@ static void ax88178_set_mfb(struct usbnet *dev)
                mfb = AX_RX_CTL_MFB_16384;
        }
 
-       rxctl = asix_read_rx_ctl(dev);
-       asix_write_rx_ctl(dev, (rxctl & ~AX_RX_CTL_MFB_16384) | mfb);
+       rxctl = asix_read_rx_ctl(dev, 0);
+       asix_write_rx_ctl(dev, (rxctl & ~AX_RX_CTL_MFB_16384) | mfb, 0);
 
-       medium = asix_read_medium_status(dev);
+       medium = asix_read_medium_status(dev, 0);
        if (dev->net->mtu > 1500)
                medium |= AX_MEDIUM_JFE;
        else
                medium &= ~AX_MEDIUM_JFE;
-       asix_write_medium_mode(dev, medium);
+       asix_write_medium_mode(dev, medium, 0);
 
        if (dev->rx_urb_size > old_rx_urb_size)
                usbnet_unlink_rx_urbs(dev);
@@ -790,7 +1062,7 @@ static int ax88178_bind(struct usbnet *dev, struct usb_interface *intf)
        usbnet_get_endpoints(dev,intf);
 
        /* Get the MAC address */
-       ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf);
+       ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0);
        if (ret < 0) {
                netdev_dbg(dev->net, "Failed to read MAC address: %d\n", ret);
                return ret;
@@ -811,10 +1083,10 @@ static int ax88178_bind(struct usbnet *dev, struct usb_interface *intf)
        dev->net->ethtool_ops = &ax88178_ethtool_ops;
 
        /* Blink LEDS so users know driver saw dongle */
-       asix_sw_reset(dev, 0);
+       asix_sw_reset(dev, 0, 0);
        msleep(150);
 
-       asix_sw_reset(dev, AX_SWRESET_PRL | AX_SWRESET_IPPD);
+       asix_sw_reset(dev, AX_SWRESET_PRL | AX_SWRESET_IPPD, 0);
        msleep(150);
 
        /* Asix framing packs multiple eth frames into a 2K usb bulk transfer */
@@ -877,7 +1149,7 @@ static const struct driver_info ax88772_info = {
        .unbind = ax88772_unbind,
        .status = asix_status,
        .link_reset = ax88772_link_reset,
-       .reset = ax88772_link_reset,
+       .reset = ax88772_reset,
        .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | FLAG_MULTI_PACKET,
        .rx_fixup = asix_rx_fixup_common,
        .tx_fixup = asix_tx_fixup,
@@ -1005,7 +1277,7 @@ static const struct usb_device_id products [] = {
 }, {
        // Lenovo U2L100P 10/100
        USB_DEVICE (0x17ef, 0x7203),
-       .driver_info = (unsigned long) &ax88772_info,
+       .driver_info = (unsigned long)&ax88772b_info,
 }, {
        // ASIX AX88772B 10/100
        USB_DEVICE (0x0b95, 0x772b),
@@ -1073,7 +1345,7 @@ static const struct usb_device_id products [] = {
 }, {
        // Asus USB Ethernet Adapter
        USB_DEVICE (0x0b95, 0x7e2b),
-       .driver_info = (unsigned long) &ax88772_info,
+       .driver_info = (unsigned long)&ax88772b_info,
 }, {
        /* ASIX 88172a demo board */
        USB_DEVICE(0x0b95, 0x172a),
@@ -1095,8 +1367,8 @@ static struct usb_driver asix_driver = {
        .name =         DRIVER_NAME,
        .id_table =     products,
        .probe =        usbnet_probe,
-       .suspend =      usbnet_suspend,
-       .resume =       usbnet_resume,
+       .suspend =      asix_suspend,
+       .resume =       asix_resume,
        .disconnect =   usbnet_disconnect,
        .supports_autosuspend = 1,
        .disable_hub_initiated_lpm = 1,
index 163a2c5..49a3bc1 100644 (file)
@@ -81,7 +81,7 @@ static void ax88172a_adjust_link(struct net_device *netdev)
        }
 
        if (mode != priv->oldmode) {
-               asix_write_medium_mode(dev, mode);
+               asix_write_medium_mode(dev, mode, 0);
                priv->oldmode = mode;
                netdev_dbg(netdev, "speed %u duplex %d, setting mode to 0x%04x\n",
                           phydev->speed, phydev->duplex, mode);
@@ -176,18 +176,19 @@ static int ax88172a_reset_phy(struct usbnet *dev, int embd_phy)
 {
        int ret;
 
-       ret = asix_sw_reset(dev, AX_SWRESET_IPPD);
+       ret = asix_sw_reset(dev, AX_SWRESET_IPPD, 0);
        if (ret < 0)
                goto err;
 
        msleep(150);
-       ret = asix_sw_reset(dev, AX_SWRESET_CLEAR);
+       ret = asix_sw_reset(dev, AX_SWRESET_CLEAR, 0);
        if (ret < 0)
                goto err;
 
        msleep(150);
 
-       ret = asix_sw_reset(dev, embd_phy ? AX_SWRESET_IPRL : AX_SWRESET_IPPD);
+       ret = asix_sw_reset(dev, embd_phy ? AX_SWRESET_IPRL : AX_SWRESET_IPPD,
+                           0);
        if (ret < 0)
                goto err;
 
@@ -213,7 +214,7 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf)
        dev->driver_priv = priv;
 
        /* Get the MAC address */
-       ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf);
+       ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0);
        if (ret < 0) {
                netdev_err(dev->net, "Failed to read MAC address: %d\n", ret);
                goto free;
@@ -224,7 +225,7 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf)
        dev->net->ethtool_ops = &ax88172a_ethtool_ops;
 
        /* are we using the internal or the external phy? */
-       ret = asix_read_cmd(dev, AX_CMD_SW_PHY_STATUS, 0, 0, 1, buf);
+       ret = asix_read_cmd(dev, AX_CMD_SW_PHY_STATUS, 0, 0, 1, buf, 0);
        if (ret < 0) {
                netdev_err(dev->net, "Failed to read software interface selection register: %d\n",
                           ret);
@@ -303,20 +304,20 @@ static int ax88172a_reset(struct usbnet *dev)
        ax88172a_reset_phy(dev, priv->use_embdphy);
 
        msleep(150);
-       rx_ctl = asix_read_rx_ctl(dev);
+       rx_ctl = asix_read_rx_ctl(dev, 0);
        netdev_dbg(dev->net, "RX_CTL is 0x%04x after software reset\n", rx_ctl);
-       ret = asix_write_rx_ctl(dev, 0x0000);
+       ret = asix_write_rx_ctl(dev, 0x0000, 0);
        if (ret < 0)
                goto out;
 
-       rx_ctl = asix_read_rx_ctl(dev);
+       rx_ctl = asix_read_rx_ctl(dev, 0);
        netdev_dbg(dev->net, "RX_CTL is 0x%04x setting to 0x0000\n", rx_ctl);
 
        msleep(150);
 
        ret = asix_write_cmd(dev, AX_CMD_WRITE_IPG0,
                             AX88772_IPG0_DEFAULT | AX88772_IPG1_DEFAULT,
-                            AX88772_IPG2_DEFAULT, 0, NULL);
+                            AX88772_IPG2_DEFAULT, 0, NULL, 0);
        if (ret < 0) {
                netdev_err(dev->net, "Write IPG,IPG1,IPG2 failed: %d\n", ret);
                goto out;
@@ -325,20 +326,20 @@ static int ax88172a_reset(struct usbnet *dev)
        /* Rewrite MAC address */
        memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN);
        ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN,
-                            data->mac_addr);
+                            data->mac_addr, 0);
        if (ret < 0)
                goto out;
 
        /* Set RX_CTL to default values with 2k buffer, and enable cactus */
-       ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL);
+       ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, 0);
        if (ret < 0)
                goto out;
 
-       rx_ctl = asix_read_rx_ctl(dev);
+       rx_ctl = asix_read_rx_ctl(dev, 0);
        netdev_dbg(dev->net, "RX_CTL is 0x%04x after all initializations\n",
                   rx_ctl);
 
-       rx_ctl = asix_read_medium_status(dev);
+       rx_ctl = asix_read_medium_status(dev, 0);
        netdev_dbg(dev->net, "Medium Status is 0x%04x after all initializations\n",
                   rx_ctl);
 
index c5544d3..e7b5163 100644 (file)
@@ -50,6 +50,8 @@
  *
  *****************************************************************************/
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 /*****************************************************************************/
 /* Debugging functions                                                       */
 /*****************************************************************************/
-#define D__(lvl_, fmt, arg...)                         \
-       do {                                            \
-               printk(lvl_ "[%d:%s]: " fmt "\n",       \
-                      __LINE__, __func__, ## arg);     \
-       } while (0)
-
-#define D_(lvl, args...)                               \
-       do {                                            \
-               if (lvl & debug)                        \
-                       D__(KERN_INFO, args);           \
-       } while (0)
-
-#define D1(args...)    D_(0x01, ##args)
-#define D2(args...)    D_(0x02, ##args)
-#define D3(args...)    D_(0x04, ##args)
-#define D4(args...)    D_(0x08, ##args)
-#define D5(args...)    D_(0x10, ##args)
+#define hso_dbg(lvl, fmt, ...)                                         \
+do {                                                                   \
+       if ((lvl) & debug)                                              \
+               pr_info("[%d:%s] " fmt,                                 \
+                       __LINE__, __func__, ##__VA_ARGS__);             \
+} while (0)
 
 /*****************************************************************************/
 /* Enumerators                                                               */
@@ -649,7 +640,7 @@ static int get_free_serial_index(void)
        }
        spin_unlock_irqrestore(&serial_table_lock, flags);
 
-       printk(KERN_ERR "%s: no free serial devices in table\n", __func__);
+       pr_err("%s: no free serial devices in table\n", __func__);
        return -1;
 }
 
@@ -709,7 +700,8 @@ static void handle_usb_error(int status, const char *function,
        }
 
        /* log a meaningful explanation of an USB status */
-       D1("%s: received USB status - %s (%d)", function, explanation, status);
+       hso_dbg(0x1, "%s: received USB status - %s (%d)\n",
+               function, explanation, status);
 }
 
 /* Network interface functions */
@@ -808,7 +800,7 @@ static netdev_tx_t hso_net_start_xmit(struct sk_buff *skb,
        DUMP1(skb->data, skb->len);
        /* Copy it from kernel memory to OUR memory */
        memcpy(odev->mux_bulk_tx_buf, skb->data, skb->len);
-       D1("len: %d/%d", skb->len, MUX_BULK_TX_BUF_SIZE);
+       hso_dbg(0x1, "len: %d/%d\n", skb->len, MUX_BULK_TX_BUF_SIZE);
 
        /* Fill in the URB for shipping it out. */
        usb_fill_bulk_urb(odev->mux_bulk_tx_urb,
@@ -872,7 +864,7 @@ static void packetizeRx(struct hso_net *odev, unsigned char *ip_pkt,
        unsigned char *tmp_rx_buf;
 
        /* log if needed */
-       D1("Rx %d bytes", count);
+       hso_dbg(0x1, "Rx %d bytes\n", count);
        DUMP(ip_pkt, min(128, (int)count));
 
        while (count) {
@@ -912,7 +904,7 @@ static void packetizeRx(struct hso_net *odev, unsigned char *ip_pkt,
                                                                    frame_len);
                                if (!odev->skb_rx_buf) {
                                        /* We got no receive buffer. */
-                                       D1("could not allocate memory");
+                                       hso_dbg(0x1, "could not allocate memory\n");
                                        odev->rx_parse_state = WAIT_SYNC;
                                        continue;
                                }
@@ -972,11 +964,11 @@ static void packetizeRx(struct hso_net *odev, unsigned char *ip_pkt,
                        break;
 
                case WAIT_SYNC:
-                       D1(" W_S");
+                       hso_dbg(0x1, " W_S\n");
                        count = 0;
                        break;
                default:
-                       D1(" ");
+                       hso_dbg(0x1, "\n");
                        count--;
                        break;
                }
@@ -1020,7 +1012,7 @@ static void read_bulk_callback(struct urb *urb)
 
        /* Sanity check */
        if (!odev || !test_bit(HSO_NET_RUNNING, &odev->flags)) {
-               D1("BULK IN callback but driver is not active!");
+               hso_dbg(0x1, "BULK IN callback but driver is not active!\n");
                return;
        }
        usb_mark_last_busy(urb->dev);
@@ -1112,11 +1104,11 @@ static void _hso_serial_set_termios(struct tty_struct *tty,
        struct hso_serial *serial = tty->driver_data;
 
        if (!serial) {
-               printk(KERN_ERR "%s: no tty structures", __func__);
+               pr_err("%s: no tty structures", __func__);
                return;
        }
 
-       D4("port %d", serial->minor);
+       hso_dbg(0x8, "port %d\n", serial->minor);
 
        /*
         *      Fix up unsupported bits
@@ -1205,11 +1197,11 @@ static void hso_std_serial_read_bulk_callback(struct urb *urb)
        struct hso_serial *serial = urb->context;
        int status = urb->status;
 
-       D4("\n--- Got serial_read_bulk callback %02x ---", status);
+       hso_dbg(0x8, "--- Got serial_read_bulk callback %02x ---\n", status);
 
        /* sanity check */
        if (!serial) {
-               D1("serial == NULL");
+               hso_dbg(0x1, "serial == NULL\n");
                return;
        }
        if (status) {
@@ -1217,7 +1209,7 @@ static void hso_std_serial_read_bulk_callback(struct urb *urb)
                return;
        }
 
-       D1("Actual length = %d\n", urb->actual_length);
+       hso_dbg(0x1, "Actual length = %d\n", urb->actual_length);
        DUMP1(urb->transfer_buffer, urb->actual_length);
 
        /* Anyone listening? */
@@ -1266,7 +1258,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp)
        if (serial == NULL || serial->magic != HSO_SERIAL_MAGIC) {
                WARN_ON(1);
                tty->driver_data = NULL;
-               D1("Failed to open port");
+               hso_dbg(0x1, "Failed to open port\n");
                return -ENODEV;
        }
 
@@ -1275,7 +1267,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp)
        if (result < 0)
                goto err_out;
 
-       D1("Opening %d", serial->minor);
+       hso_dbg(0x1, "Opening %d\n", serial->minor);
 
        /* setup */
        tty->driver_data = serial;
@@ -1298,7 +1290,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp)
                        kref_get(&serial->parent->ref);
                }
        } else {
-               D1("Port was already open");
+               hso_dbg(0x1, "Port was already open\n");
        }
 
        usb_autopm_put_interface(serial->parent->interface);
@@ -1317,7 +1309,7 @@ static void hso_serial_close(struct tty_struct *tty, struct file *filp)
        struct hso_serial *serial = tty->driver_data;
        u8 usb_gone;
 
-       D1("Closing serial port");
+       hso_dbg(0x1, "Closing serial port\n");
 
        /* Open failed, no close cleanup required */
        if (serial == NULL)
@@ -1357,7 +1349,7 @@ static int hso_serial_write(struct tty_struct *tty, const unsigned char *buf,
 
        /* sanity check */
        if (serial == NULL) {
-               printk(KERN_ERR "%s: serial is NULL\n", __func__);
+               pr_err("%s: serial is NULL\n", __func__);
                return -ENODEV;
        }
 
@@ -1412,8 +1404,8 @@ static void hso_serial_set_termios(struct tty_struct *tty, struct ktermios *old)
        unsigned long flags;
 
        if (old)
-               D5("Termios called with: cflags new[%d] - old[%d]",
-                  tty->termios.c_cflag, old->c_cflag);
+               hso_dbg(0x16, "Termios called with: cflags new[%d] - old[%d]\n",
+                       tty->termios.c_cflag, old->c_cflag);
 
        /* the actual setup */
        spin_lock_irqsave(&serial->serial_lock, flags);
@@ -1649,7 +1641,7 @@ static int hso_serial_tiocmget(struct tty_struct *tty)
 
        /* sanity check */
        if (!serial) {
-               D1("no tty structures");
+               hso_dbg(0x1, "no tty structures\n");
                return -EINVAL;
        }
        spin_lock_irq(&serial->serial_lock);
@@ -1682,7 +1674,7 @@ static int hso_serial_tiocmset(struct tty_struct *tty,
 
        /* sanity check */
        if (!serial) {
-               D1("no tty structures");
+               hso_dbg(0x1, "no tty structures\n");
                return -EINVAL;
        }
 
@@ -1721,7 +1713,7 @@ static int hso_serial_ioctl(struct tty_struct *tty,
 {
        struct hso_serial *serial = tty->driver_data;
        int ret = 0;
-       D4("IOCTL cmd: %d, arg: %ld", cmd, arg);
+       hso_dbg(0x8, "IOCTL cmd: %d, arg: %ld\n", cmd, arg);
 
        if (!serial)
                return -ENODEV;
@@ -1783,7 +1775,7 @@ static int mux_device_request(struct hso_serial *serial, u8 type, u16 port,
 
        /* Sanity check */
        if (!serial || !ctrl_urb || !ctrl_req) {
-               printk(KERN_ERR "%s: Wrong arguments\n", __func__);
+               pr_err("%s: Wrong arguments\n", __func__);
                return -EINVAL;
        }
 
@@ -1808,9 +1800,9 @@ static int mux_device_request(struct hso_serial *serial, u8 type, u16 port,
                pipe = usb_sndctrlpipe(serial->parent->usb, 0);
        }
        /* syslog */
-       D2("%s command (%02x) len: %d, port: %d",
-          type == USB_CDC_GET_ENCAPSULATED_RESPONSE ? "Read" : "Write",
-          ctrl_req->bRequestType, ctrl_req->wLength, port);
+       hso_dbg(0x2, "%s command (%02x) len: %d, port: %d\n",
+               type == USB_CDC_GET_ENCAPSULATED_RESPONSE ? "Read" : "Write",
+               ctrl_req->bRequestType, ctrl_req->wLength, port);
 
        /* Load ctrl urb */
        ctrl_urb->transfer_flags = 0;
@@ -1876,11 +1868,11 @@ static void intr_callback(struct urb *urb)
                handle_usb_error(status, __func__, NULL);
                return;
        }
-       D4("\n--- Got intr callback 0x%02X ---", status);
+       hso_dbg(0x8, "--- Got intr callback 0x%02X ---\n", status);
 
        /* what request? */
        port_req = urb->transfer_buffer;
-       D4(" port_req = 0x%.2X\n", *port_req);
+       hso_dbg(0x8, "port_req = 0x%.2X\n", *port_req);
        /* loop over all muxed ports to find the one sending this */
        for (i = 0; i < 8; i++) {
                /* max 8 channels on MUX */
@@ -1888,7 +1880,8 @@ static void intr_callback(struct urb *urb)
                        serial = get_serial_by_shared_int_and_type(shared_int,
                                                                   (1 << i));
                        if (serial != NULL) {
-                               D1("Pending read interrupt on port %d\n", i);
+                               hso_dbg(0x1, "Pending read interrupt on port %d\n",
+                                       i);
                                spin_lock(&serial->serial_lock);
                                if (serial->rx_state == RX_IDLE &&
                                        serial->port.count > 0) {
@@ -1900,8 +1893,8 @@ static void intr_callback(struct urb *urb)
                                        } else
                                                serial->rx_state = RX_PENDING;
                                } else {
-                                       D1("Already a read pending on "
-                                          "port %d or port not open\n", i);
+                                       hso_dbg(0x1, "Already a read pending on port %d or port not open\n",
+                                               i);
                                }
                                spin_unlock(&serial->serial_lock);
                        }
@@ -1933,7 +1926,7 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb)
 
        /* sanity check */
        if (!serial) {
-               D1("serial == NULL");
+               hso_dbg(0x1, "serial == NULL\n");
                return;
        }
 
@@ -1948,7 +1941,7 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb)
        tty_port_tty_wakeup(&serial->port);
        hso_kick_transmit(serial);
 
-       D1(" ");
+       hso_dbg(0x1, "\n");
 }
 
 /* called for writing diag or CS serial port */
@@ -1996,8 +1989,8 @@ static void ctrl_callback(struct urb *urb)
 
        /* what request? */
        req = (struct usb_ctrlrequest *)(urb->setup_packet);
-       D4("\n--- Got muxed ctrl callback 0x%02X ---", status);
-       D4("Actual length of urb = %d\n", urb->actual_length);
+       hso_dbg(0x8, "--- Got muxed ctrl callback 0x%02X ---\n", status);
+       hso_dbg(0x8, "Actual length of urb = %d\n", urb->actual_length);
        DUMP1(urb->transfer_buffer, urb->actual_length);
 
        if (req->bRequestType ==
@@ -2023,7 +2016,7 @@ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial)
 
        /* Sanity check */
        if (urb == NULL || serial == NULL) {
-               D1("serial = NULL");
+               hso_dbg(0x1, "serial = NULL\n");
                return -2;
        }
 
@@ -2035,7 +2028,7 @@ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial)
        }
 
        /* Push data to tty */
-       D1("data to push to tty");
+       hso_dbg(0x1, "data to push to tty\n");
        count = tty_buffer_request_room(&serial->port, urb->actual_length);
        if (count >= urb->actual_length) {
                tty_insert_flip_string(&serial->port, urb->transfer_buffer,
@@ -2415,7 +2408,7 @@ static void hso_net_init(struct net_device *net)
 {
        struct hso_net *hso_net = netdev_priv(net);
 
-       D1("sizeof hso_net is %d", (int)sizeof(*hso_net));
+       hso_dbg(0x1, "sizeof hso_net is %zu\n", sizeof(*hso_net));
 
        /* fill in the other fields */
        net->netdev_ops = &hso_netdev_ops;
@@ -3229,7 +3222,7 @@ static int __init hso_init(void)
        int result;
 
        /* put it in the log */
-       printk(KERN_INFO "hso: %s\n", version);
+       pr_info("%s\n", version);
 
        /* Initialise the serial table semaphore and table */
        spin_lock_init(&serial_table_lock);
@@ -3260,16 +3253,15 @@ static int __init hso_init(void)
        /* register the tty driver */
        result = tty_register_driver(tty_drv);
        if (result) {
-               printk(KERN_ERR "%s - tty_register_driver failed(%d)\n",
-                       __func__, result);
+               pr_err("%s - tty_register_driver failed(%d)\n",
+                      __func__, result);
                goto err_free_tty;
        }
 
        /* register this module as an usb driver */
        result = usb_register(&hso_driver);
        if (result) {
-               printk(KERN_ERR "Could not register hso driver? error: %d\n",
-                       result);
+               pr_err("Could not register hso driver - error: %d\n", result);
                goto err_unreg_tty;
        }
 
@@ -3284,7 +3276,7 @@ err_free_tty:
 
 static void __exit hso_exit(void)
 {
-       printk(KERN_INFO "hso: unloaded\n");
+       pr_info("unloaded\n");
 
        tty_unregister_driver(tty_drv);
        put_tty_driver(tty_drv);
@@ -3301,7 +3293,7 @@ MODULE_DESCRIPTION(MOD_DESCRIPTION);
 MODULE_LICENSE(MOD_LICENSE);
 
 /* change the debug level (eg: insmod hso.ko debug=0x04) */
-MODULE_PARM_DESC(debug, "Level of debug [0x01 | 0x02 | 0x04 | 0x08 | 0x10]");
+MODULE_PARM_DESC(debug, "debug level mask [0x01 | 0x02 | 0x04 | 0x08 | 0x10]");
 module_param(debug, int, S_IRUGO | S_IWUSR);
 
 /* set the major tty number (eg: insmod hso.ko tty_major=245) */
index 770212b..66b34dd 100644 (file)
@@ -265,8 +265,6 @@ static int kaweth_control(struct kaweth_device *kaweth,
        struct usb_ctrlrequest *dr;
        int retval;
 
-       netdev_dbg(kaweth->net, "kaweth_control()\n");
-
        if(in_interrupt()) {
                netdev_dbg(kaweth->net, "in_interrupt()\n");
                return -EBUSY;
@@ -300,8 +298,6 @@ static int kaweth_read_configuration(struct kaweth_device *kaweth)
 {
        int retval;
 
-       netdev_dbg(kaweth->net, "Reading kaweth configuration\n");
-
        retval = kaweth_control(kaweth,
                                usb_rcvctrlpipe(kaweth->dev, 0),
                                KAWETH_COMMAND_GET_ETHERNET_DESC,
@@ -451,8 +447,6 @@ static int kaweth_trigger_firmware(struct kaweth_device *kaweth,
        kaweth->firmware_buf[6] = 0x00;
        kaweth->firmware_buf[7] = 0x00;
 
-       netdev_dbg(kaweth->net, "Triggering firmware\n");
-
        return kaweth_control(kaweth,
                              usb_sndctrlpipe(kaweth->dev, 0),
                              KAWETH_COMMAND_SCAN,
@@ -471,7 +465,6 @@ static int kaweth_reset(struct kaweth_device *kaweth)
 {
        int result;
 
-       netdev_dbg(kaweth->net, "kaweth_reset(%p)\n", kaweth);
        result = usb_reset_configuration(kaweth->dev);
        mdelay(10);
 
@@ -685,8 +678,6 @@ static int kaweth_open(struct net_device *net)
        struct kaweth_device *kaweth = netdev_priv(net);
        int res;
 
-       netdev_dbg(kaweth->net, "Opening network device.\n");
-
        res = usb_autopm_get_interface(kaweth->intf);
        if (res) {
                dev_err(&kaweth->intf->dev, "Interface cannot be resumed.\n");
@@ -951,7 +942,6 @@ static int kaweth_suspend(struct usb_interface *intf, pm_message_t message)
        struct kaweth_device *kaweth = usb_get_intfdata(intf);
        unsigned long flags;
 
-       dev_dbg(&intf->dev, "Suspending device\n");
        spin_lock_irqsave(&kaweth->device_lock, flags);
        kaweth->status |= KAWETH_STATUS_SUSPENDING;
        spin_unlock_irqrestore(&kaweth->device_lock, flags);
@@ -968,7 +958,6 @@ static int kaweth_resume(struct usb_interface *intf)
        struct kaweth_device *kaweth = usb_get_intfdata(intf);
        unsigned long flags;
 
-       dev_dbg(&intf->dev, "Resuming device\n");
        spin_lock_irqsave(&kaweth->device_lock, flags);
        kaweth->status &= ~KAWETH_STATUS_SUSPENDING;
        spin_unlock_irqrestore(&kaweth->device_lock, flags);
@@ -1009,6 +998,7 @@ static int kaweth_probe(
        struct net_device *netdev;
        const eth_addr_t bcast_addr = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
        int result = 0;
+       int rv = -EIO;
 
        dev_dbg(dev,
                "Kawasaki Device Probe (Device number:%d): 0x%4.4x:0x%4.4x:0x%4.4x\n",
@@ -1029,6 +1019,7 @@ static int kaweth_probe(
        kaweth = netdev_priv(netdev);
        kaweth->dev = udev;
        kaweth->net = netdev;
+       kaweth->intf = intf;
 
        spin_lock_init(&kaweth->device_lock);
        init_waitqueue_head(&kaweth->term_wait);
@@ -1048,6 +1039,10 @@ static int kaweth_probe(
                /* Download the firmware */
                dev_info(dev, "Downloading firmware...\n");
                kaweth->firmware_buf = (__u8 *)__get_free_page(GFP_KERNEL);
+               if (!kaweth->firmware_buf) {
+                       rv = -ENOMEM;
+                       goto err_free_netdev;
+               }
                if ((result = kaweth_download_firmware(kaweth,
                                                      "kaweth/new_code.bin",
                                                      100,
@@ -1139,8 +1134,6 @@ err_fw:
 
        dev_dbg(dev, "Initializing net device.\n");
 
-       kaweth->intf = intf;
-
        kaweth->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
        if (!kaweth->tx_urb)
                goto err_free_netdev;
@@ -1186,8 +1179,6 @@ err_fw:
        dev_info(dev, "kaweth interface created at %s\n",
                 kaweth->net->name);
 
-       dev_dbg(dev, "Kaweth probe returning.\n");
-
        return 0;
 
 err_intfdata:
@@ -1204,7 +1195,7 @@ err_only_tx:
 err_free_netdev:
        free_netdev(netdev);
 
-       return -EIO;
+       return rv;
 }
 
 /****************************************************************
@@ -1215,8 +1206,6 @@ static void kaweth_disconnect(struct usb_interface *intf)
        struct kaweth_device *kaweth = usb_get_intfdata(intf);
        struct net_device *netdev;
 
-       dev_info(&intf->dev, "Unregistering\n");
-
        usb_set_intfdata(intf, NULL);
        if (!kaweth) {
                dev_warn(&intf->dev, "unregistering non-existent device\n");
index 432b8a3..db558b8 100644 (file)
@@ -1179,7 +1179,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
  * NOTE:  annoying asymmetry:  if it's active, schedule_work() fails,
  * but tasklet_schedule() doesn't.     hope the failure is rare.
  */
-void lan78xx_defer_kevent(struct lan78xx_net *dev, int work)
+static void lan78xx_defer_kevent(struct lan78xx_net *dev, int work)
 {
        set_bit(work, &dev->flags);
        if (!schedule_delayed_work(&dev->wq, 0))
@@ -1406,7 +1406,7 @@ static u32 lan78xx_get_link(struct net_device *net)
        return net->phydev->link;
 }
 
-int lan78xx_nway_reset(struct net_device *net)
+static int lan78xx_nway_reset(struct net_device *net)
 {
        return phy_start_aneg(net->phydev);
 }
@@ -1997,7 +1997,7 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu)
        return 0;
 }
 
-int lan78xx_set_mac_addr(struct net_device *netdev, void *p)
+static int lan78xx_set_mac_addr(struct net_device *netdev, void *p)
 {
        struct lan78xx_net *dev = netdev_priv(netdev);
        struct sockaddr *addr = p;
@@ -2371,7 +2371,7 @@ static void lan78xx_terminate_urbs(struct lan78xx_net *dev)
        remove_wait_queue(&unlink_wakeup, &wait);
 }
 
-int lan78xx_stop(struct net_device *net)
+static int lan78xx_stop(struct net_device *net)
 {
        struct lan78xx_net              *dev = netdev_priv(net);
 
@@ -2533,7 +2533,8 @@ static void lan78xx_queue_skb(struct sk_buff_head *list,
        entry->state = state;
 }
 
-netdev_tx_t lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net)
+static netdev_tx_t
+lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net)
 {
        struct lan78xx_net *dev = netdev_priv(net);
        struct sk_buff *skb2 = NULL;
@@ -2562,7 +2563,8 @@ netdev_tx_t lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net)
        return NETDEV_TX_OK;
 }
 
-int lan78xx_get_endpoints(struct lan78xx_net *dev, struct usb_interface *intf)
+static int
+lan78xx_get_endpoints(struct lan78xx_net *dev, struct usb_interface *intf)
 {
        int tmp;
        struct usb_host_interface *alt = NULL;
@@ -2700,7 +2702,7 @@ static void lan78xx_rx_csum_offload(struct lan78xx_net *dev,
        }
 }
 
-void lan78xx_skb_return(struct lan78xx_net *dev, struct sk_buff *skb)
+static void lan78xx_skb_return(struct lan78xx_net *dev, struct sk_buff *skb)
 {
        int             status;
 
@@ -3283,7 +3285,7 @@ static void lan78xx_disconnect(struct usb_interface *intf)
        usb_put_dev(udev);
 }
 
-void lan78xx_tx_timeout(struct net_device *net)
+static void lan78xx_tx_timeout(struct net_device *net)
 {
        struct lan78xx_net *dev = netdev_priv(net);
 
@@ -3603,7 +3605,7 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
        return 0;
 }
 
-int lan78xx_suspend(struct usb_interface *intf, pm_message_t message)
+static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message)
 {
        struct lan78xx_net *dev = usb_get_intfdata(intf);
        struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
@@ -3699,7 +3701,7 @@ out:
        return ret;
 }
 
-int lan78xx_resume(struct usb_interface *intf)
+static int lan78xx_resume(struct usb_interface *intf)
 {
        struct lan78xx_net *dev = usb_get_intfdata(intf);
        struct sk_buff *skb;
@@ -3766,7 +3768,7 @@ int lan78xx_resume(struct usb_interface *intf)
        return 0;
 }
 
-int lan78xx_reset_resume(struct usb_interface *intf)
+static int lan78xx_reset_resume(struct usb_interface *intf)
 {
        struct lan78xx_net *dev = usb_get_intfdata(intf);
 
index 9bbe016..1434e5d 100644 (file)
@@ -1129,7 +1129,8 @@ static int pegasus_probe(struct usb_interface *intf,
                return -ENODEV;
 
        if (pegasus_count == 0) {
-               pegasus_workqueue = create_singlethread_workqueue("pegasus");
+               pegasus_workqueue = alloc_workqueue("pegasus", WQ_MEM_RECLAIM,
+                                                   0);
                if (!pegasus_workqueue)
                        return -ENOMEM;
        }
index f41a8ad..9338f58 100644 (file)
@@ -1076,8 +1076,7 @@ static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa)
                return -ENODEV;
        if (obj->type != ACPI_TYPE_BUFFER || obj->string.length != 0x17) {
                netif_warn(tp, probe, tp->netdev,
-                          "Invalid buffer when reading pass-thru MAC addr: "
-                          "(%d, %d)\n",
+                          "Invalid buffer for pass-thru MAC addr: (%d, %d)\n",
                           obj->type, obj->string.length);
                goto amacout;
        }
@@ -1090,8 +1089,8 @@ static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa)
        ret = hex2bin(buf, obj->string.pointer + 9, 6);
        if (!(ret == 0 && is_valid_ether_addr(buf))) {
                netif_warn(tp, probe, tp->netdev,
-                          "Invalid MAC when reading pass-thru MAC addr: "
-                          "%d, %pM\n", ret, buf);
+                          "Invalid MAC for pass-thru MAC addr: %d, %pM\n",
+                          ret, buf);
                ret = -EINVAL;
                goto amacout;
        }
@@ -1111,9 +1110,9 @@ static int set_ethernet_addr(struct r8152 *tp)
        struct sockaddr sa;
        int ret;
 
-       if (tp->version == RTL_VER_01)
+       if (tp->version == RTL_VER_01) {
                ret = pla_ocp_read(tp, PLA_IDR, 8, sa.sa_data);
-       else {
+       else {
                /* if this is not an RTL8153-AD, no eFuse mac pass thru set,
                 * or system doesn't provide valid _SB.AMAC this will be
                 * be expected to non-zero
@@ -4032,7 +4031,7 @@ static int rtl8152_set_coalesce(struct net_device *netdev,
        return ret;
 }
 
-static struct ethtool_ops ops = {
+static const struct ethtool_ops ops = {
        .get_drvinfo = rtl8152_get_drvinfo,
        .get_settings = rtl8152_get_settings,
        .set_settings = rtl8152_set_settings,
index dc989a8..831aa33 100644 (file)
@@ -33,7 +33,7 @@
 #include "smsc95xx.h"
 
 #define SMSC_CHIPNAME                  "smsc95xx"
-#define SMSC_DRIVER_VERSION            "1.0.4"
+#define SMSC_DRIVER_VERSION            "1.0.5"
 #define HS_USB_PKT_SIZE                        (512)
 #define FS_USB_PKT_SIZE                        (64)
 #define DEFAULT_HS_BURST_CAP_SIZE      (16 * 1024 + 5 * HS_USB_PKT_SIZE)
@@ -64,6 +64,7 @@
 #define CARRIER_CHECK_DELAY (2 * HZ)
 
 struct smsc95xx_priv {
+       u32 chip_id;
        u32 mac_cr;
        u32 hash_hi;
        u32 hash_lo;
@@ -71,6 +72,7 @@ struct smsc95xx_priv {
        spinlock_t mac_cr_lock;
        u8 features;
        u8 suspend_flags;
+       u8 mdix_ctrl;
        bool link_ok;
        struct delayed_work carrier_check;
        struct usbnet *dev;
@@ -782,14 +784,113 @@ static int smsc95xx_ethtool_set_wol(struct net_device *net,
        return ret;
 }
 
+static int get_mdix_status(struct net_device *net)
+{
+       struct usbnet *dev = netdev_priv(net);
+       u32 val;
+       int buf;
+
+       buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, SPECIAL_CTRL_STS);
+       if (buf & SPECIAL_CTRL_STS_OVRRD_AMDIX_) {
+               if (buf & SPECIAL_CTRL_STS_AMDIX_ENABLE_)
+                       return ETH_TP_MDI_AUTO;
+               else if (buf & SPECIAL_CTRL_STS_AMDIX_STATE_)
+                       return ETH_TP_MDI_X;
+       } else {
+               buf = smsc95xx_read_reg(dev, STRAP_STATUS, &val);
+               if (val & STRAP_STATUS_AMDIX_EN_)
+                       return ETH_TP_MDI_AUTO;
+       }
+
+       return ETH_TP_MDI;
+}
+
+static void set_mdix_status(struct net_device *net, __u8 mdix_ctrl)
+{
+       struct usbnet *dev = netdev_priv(net);
+       struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+       int buf;
+
+       if ((pdata->chip_id == ID_REV_CHIP_ID_9500A_) ||
+           (pdata->chip_id == ID_REV_CHIP_ID_9530_) ||
+           (pdata->chip_id == ID_REV_CHIP_ID_89530_) ||
+           (pdata->chip_id == ID_REV_CHIP_ID_9730_)) {
+               /* Extend Manual AutoMDIX timer for 9500A/9500Ai */
+               buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id,
+                                        PHY_EDPD_CONFIG);
+               buf |= PHY_EDPD_CONFIG_EXT_CROSSOVER_;
+               smsc95xx_mdio_write(dev->net, dev->mii.phy_id,
+                                   PHY_EDPD_CONFIG, buf);
+       }
+
+       if (mdix_ctrl == ETH_TP_MDI) {
+               buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id,
+                                        SPECIAL_CTRL_STS);
+               buf |= SPECIAL_CTRL_STS_OVRRD_AMDIX_;
+               buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ |
+                        SPECIAL_CTRL_STS_AMDIX_STATE_);
+               smsc95xx_mdio_write(dev->net, dev->mii.phy_id,
+                                   SPECIAL_CTRL_STS, buf);
+       } else if (mdix_ctrl == ETH_TP_MDI_X) {
+               buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id,
+                                        SPECIAL_CTRL_STS);
+               buf |= SPECIAL_CTRL_STS_OVRRD_AMDIX_;
+               buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ |
+                        SPECIAL_CTRL_STS_AMDIX_STATE_);
+               buf |= SPECIAL_CTRL_STS_AMDIX_STATE_;
+               smsc95xx_mdio_write(dev->net, dev->mii.phy_id,
+                                   SPECIAL_CTRL_STS, buf);
+       } else if (mdix_ctrl == ETH_TP_MDI_AUTO) {
+               buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id,
+                                        SPECIAL_CTRL_STS);
+               buf &= ~SPECIAL_CTRL_STS_OVRRD_AMDIX_;
+               buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ |
+                        SPECIAL_CTRL_STS_AMDIX_STATE_);
+               buf |= SPECIAL_CTRL_STS_AMDIX_ENABLE_;
+               smsc95xx_mdio_write(dev->net, dev->mii.phy_id,
+                                   SPECIAL_CTRL_STS, buf);
+       }
+       pdata->mdix_ctrl = mdix_ctrl;
+}
+
+static int smsc95xx_get_settings(struct net_device *net,
+                                struct ethtool_cmd *cmd)
+{
+       struct usbnet *dev = netdev_priv(net);
+       struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+       int retval;
+
+       retval = usbnet_get_settings(net, cmd);
+
+       cmd->eth_tp_mdix = pdata->mdix_ctrl;
+       cmd->eth_tp_mdix_ctrl = pdata->mdix_ctrl;
+
+       return retval;
+}
+
+static int smsc95xx_set_settings(struct net_device *net,
+                                struct ethtool_cmd *cmd)
+{
+       struct usbnet *dev = netdev_priv(net);
+       struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+       int retval;
+
+       if (pdata->mdix_ctrl != cmd->eth_tp_mdix_ctrl)
+               set_mdix_status(net, cmd->eth_tp_mdix_ctrl);
+
+       retval = usbnet_set_settings(net, cmd);
+
+       return retval;
+}
+
 static const struct ethtool_ops smsc95xx_ethtool_ops = {
        .get_link       = usbnet_get_link,
        .nway_reset     = usbnet_nway_reset,
        .get_drvinfo    = usbnet_get_drvinfo,
        .get_msglevel   = usbnet_get_msglevel,
        .set_msglevel   = usbnet_set_msglevel,
-       .get_settings   = usbnet_get_settings,
-       .set_settings   = usbnet_set_settings,
+       .get_settings   = smsc95xx_get_settings,
+       .set_settings   = smsc95xx_set_settings,
        .get_eeprom_len = smsc95xx_ethtool_get_eeprom_len,
        .get_eeprom     = smsc95xx_ethtool_get_eeprom,
        .set_eeprom     = smsc95xx_ethtool_set_eeprom,
@@ -1194,6 +1295,8 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
        if (ret < 0)
                return ret;
        val >>= 16;
+       pdata->chip_id = val;
+       pdata->mdix_ctrl = get_mdix_status(dev->net);
 
        if ((val == ID_REV_CHIP_ID_9500A_) || (val == ID_REV_CHIP_ID_9530_) ||
            (val == ID_REV_CHIP_ID_89530_) || (val == ID_REV_CHIP_ID_9730_))
index 526faa0..29a4d9e 100644 (file)
 
 #define BURST_CAP                      (0x38)
 
+#define        STRAP_STATUS                    (0x3C)
+#define        STRAP_STATUS_PWR_SEL_           (0x00000020)
+#define        STRAP_STATUS_AMDIX_EN_          (0x00000010)
+#define        STRAP_STATUS_PORT_SWAP_         (0x00000008)
+#define        STRAP_STATUS_EEP_SIZE_          (0x00000004)
+#define        STRAP_STATUS_RMT_WKP_           (0x00000002)
+#define        STRAP_STATUS_EEP_DISABLE_       (0x00000001)
+
 #define GPIO_WAKE                      (0x64)
 
 #define INT_EP_CTL                     (0x68)
index f37a6e6..fbc853e 100644 (file)
@@ -313,7 +313,7 @@ static const struct net_device_ops veth_netdev_ops = {
 };
 
 #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
-                      NETIF_F_RXCSUM | NETIF_F_HIGHDMA | \
+                      NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \
                       NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
                       NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \
                       NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX )
@@ -340,6 +340,7 @@ static void veth_setup(struct net_device *dev)
 
        dev->hw_features = VETH_FEATURES;
        dev->hw_enc_features = VETH_FEATURES;
+       dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
 }
 
 /*
index c68fe49..2fd93b4 100644 (file)
@@ -914,7 +914,9 @@ vmxnet3_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 {
        struct Vmxnet3_TxDataDesc *tdd;
 
-       tdd = tq->data_ring.base + tq->tx_ring.next2fill;
+       tdd = (struct Vmxnet3_TxDataDesc *)((u8 *)tq->data_ring.base +
+                                           tq->tx_ring.next2fill *
+                                           tq->txdata_desc_size);
 
        memcpy(tdd->data, skb->data, ctx->copy_size);
        netdev_dbg(adapter->netdev,
@@ -1639,7 +1641,7 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
        }
 }
 
-void
+static void
 vmxnet3_rq_destroy_all_rxdataring(struct vmxnet3_adapter *adapter)
 {
        int i;
index 74fc030..7dc37a0 100644 (file)
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.9.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.4.a.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01040900
+#define VMXNET3_DRIVER_VERSION_NUM      0x01040a00
 
 #if defined(CONFIG_PCI_MSI)
        /* RSS only makes sense if MSI-X is supported. */
index 1ce7420..55674b0 100644 (file)
@@ -137,6 +137,20 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
+static int vrf_ip6_local_out(struct net *net, struct sock *sk,
+                            struct sk_buff *skb)
+{
+       int err;
+
+       err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net,
+                     sk, skb, NULL, skb_dst(skb)->dev, dst_output);
+
+       if (likely(err == 1))
+               err = dst_output(net, sk, skb);
+
+       return err;
+}
+
 static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
                                           struct net_device *dev)
 {
@@ -151,7 +165,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
                .flowlabel = ip6_flowinfo(iph),
                .flowi6_mark = skb->mark,
                .flowi6_proto = iph->nexthdr,
-               .flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF,
+               .flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF,
        };
        int ret = NET_XMIT_DROP;
        struct dst_entry *dst;
@@ -207,7 +221,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
        /* strip the ethernet header added for pass through VRF device */
        __skb_pull(skb, skb_network_offset(skb));
 
-       ret = ip6_local_out(net, skb->sk, skb);
+       ret = vrf_ip6_local_out(net, skb->sk, skb);
        if (unlikely(net_xmit_eval(ret)))
                dev->stats.tx_errors++;
        else
@@ -227,6 +241,20 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
 }
 #endif
 
+/* based on ip_local_out; can't use it b/c the dst is switched pointing to us */
+static int vrf_ip_local_out(struct net *net, struct sock *sk,
+                           struct sk_buff *skb)
+{
+       int err;
+
+       err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
+                     skb, NULL, skb_dst(skb)->dev, dst_output);
+       if (likely(err == 1))
+               err = dst_output(net, sk, skb);
+
+       return err;
+}
+
 static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
                                           struct net_device *vrf_dev)
 {
@@ -237,8 +265,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
                .flowi4_oif = vrf_dev->ifindex,
                .flowi4_iif = LOOPBACK_IFINDEX,
                .flowi4_tos = RT_TOS(ip4h->tos),
-               .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_L3MDEV_SRC |
-                               FLOWI_FLAG_SKIP_NH_OIF,
+               .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF,
                .daddr = ip4h->daddr,
        };
        struct net *net = dev_net(vrf_dev);
@@ -292,7 +319,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
                                               RT_SCOPE_LINK);
        }
 
-       ret = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
+       ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
        if (unlikely(net_xmit_eval(ret)))
                vrf_dev->stats.tx_errors++;
        else
@@ -377,6 +404,43 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb)
                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
 
+/* set dst on skb to send packet to us via dev_xmit path. Allows
+ * packet to go through device based features such as qdisc, netfilter
+ * hooks and packet sockets with skb->dev set to vrf device.
+ */
+static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
+                                  struct sock *sk,
+                                  struct sk_buff *skb)
+{
+       struct net_vrf *vrf = netdev_priv(vrf_dev);
+       struct dst_entry *dst = NULL;
+       struct rt6_info *rt6;
+
+       /* don't divert link scope packets */
+       if (rt6_need_strict(&ipv6_hdr(skb)->daddr))
+               return skb;
+
+       rcu_read_lock();
+
+       rt6 = rcu_dereference(vrf->rt6);
+       if (likely(rt6)) {
+               dst = &rt6->dst;
+               dst_hold(dst);
+       }
+
+       rcu_read_unlock();
+
+       if (unlikely(!dst)) {
+               vrf_tx_error(vrf_dev, skb);
+               return NULL;
+       }
+
+       skb_dst_drop(skb);
+       skb_dst_set(skb, dst);
+
+       return skb;
+}
+
 /* holding rtnl */
 static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
 {
@@ -463,6 +527,13 @@ out:
        return rc;
 }
 #else
+static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
+                                  struct sock *sk,
+                                  struct sk_buff *skb)
+{
+       return skb;
+}
+
 static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
 {
 }
@@ -531,6 +602,55 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
                            !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
+/* set dst on skb to send packet to us via dev_xmit path. Allows
+ * packet to go through device based features such as qdisc, netfilter
+ * hooks and packet sockets with skb->dev set to vrf device.
+ */
+static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
+                                 struct sock *sk,
+                                 struct sk_buff *skb)
+{
+       struct net_vrf *vrf = netdev_priv(vrf_dev);
+       struct dst_entry *dst = NULL;
+       struct rtable *rth;
+
+       rcu_read_lock();
+
+       rth = rcu_dereference(vrf->rth);
+       if (likely(rth)) {
+               dst = &rth->dst;
+               dst_hold(dst);
+       }
+
+       rcu_read_unlock();
+
+       if (unlikely(!dst)) {
+               vrf_tx_error(vrf_dev, skb);
+               return NULL;
+       }
+
+       skb_dst_drop(skb);
+       skb_dst_set(skb, dst);
+
+       return skb;
+}
+
+/* called with rcu lock held */
+static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
+                                 struct sock *sk,
+                                 struct sk_buff *skb,
+                                 u16 proto)
+{
+       switch (proto) {
+       case AF_INET:
+               return vrf_ip_out(vrf_dev, sk, skb);
+       case AF_INET6:
+               return vrf_ip6_out(vrf_dev, sk, skb);
+       }
+
+       return skb;
+}
+
 /* holding rtnl */
 static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
 {
@@ -722,63 +842,6 @@ static u32 vrf_fib_table(const struct net_device *dev)
        return vrf->tb_id;
 }
 
-static struct rtable *vrf_get_rtable(const struct net_device *dev,
-                                    const struct flowi4 *fl4)
-{
-       struct rtable *rth = NULL;
-
-       if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) {
-               struct net_vrf *vrf = netdev_priv(dev);
-
-               rcu_read_lock();
-
-               rth = rcu_dereference(vrf->rth);
-               if (likely(rth))
-                       dst_hold(&rth->dst);
-
-               rcu_read_unlock();
-       }
-
-       return rth;
-}
-
-/* called under rcu_read_lock */
-static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
-{
-       struct fib_result res = { .tclassid = 0 };
-       struct net *net = dev_net(dev);
-       u32 orig_tos = fl4->flowi4_tos;
-       u8 flags = fl4->flowi4_flags;
-       u8 scope = fl4->flowi4_scope;
-       u8 tos = RT_FL_TOS(fl4);
-       int rc;
-
-       if (unlikely(!fl4->daddr))
-               return 0;
-
-       fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF;
-       fl4->flowi4_iif = LOOPBACK_IFINDEX;
-       /* make sure oif is set to VRF device for lookup */
-       fl4->flowi4_oif = dev->ifindex;
-       fl4->flowi4_tos = tos & IPTOS_RT_MASK;
-       fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
-                            RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
-
-       rc = fib_lookup(net, fl4, &res, 0);
-       if (!rc) {
-               if (res.type == RTN_LOCAL)
-                       fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr;
-               else
-                       fib_select_path(net, &res, fl4, -1);
-       }
-
-       fl4->flowi4_flags = flags;
-       fl4->flowi4_tos = orig_tos;
-       fl4->flowi4_scope = scope;
-
-       return rc;
-}
-
 static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
        return 0;
@@ -970,106 +1033,44 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
-                                        struct flowi6 *fl6)
+/* send to link-local or multicast address via interface enslaved to
+ * VRF device. Force lookup to VRF table without changing flow struct
+ */
+static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
+                                             struct flowi6 *fl6)
 {
-       bool need_strict = rt6_need_strict(&fl6->daddr);
-       struct net_vrf *vrf = netdev_priv(dev);
        struct net *net = dev_net(dev);
+       int flags = RT6_LOOKUP_F_IFACE;
        struct dst_entry *dst = NULL;
        struct rt6_info *rt;
 
-       /* send to link-local or multicast address */
-       if (need_strict) {
-               int flags = RT6_LOOKUP_F_IFACE;
-
-               /* VRF device does not have a link-local address and
-                * sending packets to link-local or mcast addresses over
-                * a VRF device does not make sense
-                */
-               if (fl6->flowi6_oif == dev->ifindex) {
-                       struct dst_entry *dst = &net->ipv6.ip6_null_entry->dst;
-
-                       dst_hold(dst);
-                       return dst;
-               }
-
-               if (!ipv6_addr_any(&fl6->saddr))
-                       flags |= RT6_LOOKUP_F_HAS_SADDR;
-
-               rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
-               if (rt)
-                       dst = &rt->dst;
-
-       } else if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) {
-
-               rcu_read_lock();
-
-               rt = rcu_dereference(vrf->rt6);
-               if (likely(rt)) {
-                       dst = &rt->dst;
-                       dst_hold(dst);
-               }
-
-               rcu_read_unlock();
+       /* VRF device does not have a link-local address and
+        * sending packets to link-local or mcast addresses over
+        * a VRF device does not make sense
+        */
+       if (fl6->flowi6_oif == dev->ifindex) {
+               dst = &net->ipv6.ip6_null_entry->dst;
+               dst_hold(dst);
+               return dst;
        }
 
-       /* make sure oif is set to VRF device for lookup */
-       if (!need_strict)
-               fl6->flowi6_oif = dev->ifindex;
-
-       return dst;
-}
-
-/* called under rcu_read_lock */
-static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk,
-                         struct flowi6 *fl6)
-{
-       struct net *net = dev_net(dev);
-       struct dst_entry *dst;
-       struct rt6_info *rt;
-       int err;
-
-       if (rt6_need_strict(&fl6->daddr)) {
-               rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif,
-                                         RT6_LOOKUP_F_IFACE);
-               if (unlikely(!rt))
-                       return 0;
+       if (!ipv6_addr_any(&fl6->saddr))
+               flags |= RT6_LOOKUP_F_HAS_SADDR;
 
+       rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
+       if (rt)
                dst = &rt->dst;
-       } else {
-               __u8 flags = fl6->flowi6_flags;
 
-               fl6->flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
-               fl6->flowi6_flags |= FLOWI_FLAG_SKIP_NH_OIF;
-
-               dst = ip6_route_output(net, sk, fl6);
-               rt = (struct rt6_info *)dst;
-
-               fl6->flowi6_flags = flags;
-       }
-
-       err = dst->error;
-       if (!err) {
-               err = ip6_route_get_saddr(net, rt, &fl6->daddr,
-                                         sk ? inet6_sk(sk)->srcprefs : 0,
-                                         &fl6->saddr);
-       }
-
-       dst_release(dst);
-
-       return err;
+       return dst;
 }
 #endif
 
 static const struct l3mdev_ops vrf_l3mdev_ops = {
        .l3mdev_fib_table       = vrf_fib_table,
-       .l3mdev_get_rtable      = vrf_get_rtable,
-       .l3mdev_get_saddr       = vrf_get_saddr,
        .l3mdev_l3_rcv          = vrf_l3_rcv,
+       .l3mdev_l3_out          = vrf_l3_out,
 #if IS_ENABLED(CONFIG_IPV6)
-       .l3mdev_get_rt6_dst     = vrf_get_rt6_dst,
-       .l3mdev_get_saddr6      = vrf_get_saddr6,
+       .l3mdev_link_scope_lookup = vrf_link_scope_lookup,
 #endif
 };
 
index c0dda6f..e7d1668 100644 (file)
@@ -27,7 +27,6 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/vxlan.h>
-#include <net/protocol.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ip6_tunnel.h>
@@ -288,7 +287,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 
        if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
            nla_put_s32(skb, NDA_LINK_NETNSID,
-                       peernet2id_alloc(dev_net(vxlan->dev), vxlan->net)))
+                       peernet2id(dev_net(vxlan->dev), vxlan->net)))
                goto nla_put_failure;
 
        if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
@@ -861,20 +860,20 @@ out:
 /* Dump forwarding table */
 static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
                          struct net_device *dev,
-                         struct net_device *filter_dev, int idx)
+                         struct net_device *filter_dev, int *idx)
 {
        struct vxlan_dev *vxlan = netdev_priv(dev);
        unsigned int h;
+       int err = 0;
 
        for (h = 0; h < FDB_HASH_SIZE; ++h) {
                struct vxlan_fdb *f;
-               int err;
 
                hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
                        struct vxlan_rdst *rd;
 
                        list_for_each_entry_rcu(rd, &f->remotes, list) {
-                               if (idx < cb->args[0])
+                               if (*idx < cb->args[2])
                                        goto skip;
 
                                err = vxlan_fdb_info(skb, vxlan, f,
@@ -882,17 +881,15 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
                                                     cb->nlh->nlmsg_seq,
                                                     RTM_NEWNEIGH,
                                                     NLM_F_MULTI, rd);
-                               if (err < 0) {
-                                       cb->args[1] = err;
+                               if (err < 0)
                                        goto out;
-                               }
 skip:
-                               ++idx;
+                               *idx += 1;
                        }
                }
        }
 out:
-       return idx;
+       return err;
 }
 
 /* Watch incoming packets to learn mapping between Ethernet address
@@ -1294,7 +1291,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
                struct metadata_dst *tun_dst;
 
                tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY,
-                                        vxlan_vni_to_tun_id(vni), sizeof(*md));
+                                        key32_to_tunnel_id(vni), sizeof(*md));
 
                if (!tun_dst)
                        goto drop;
@@ -1948,7 +1945,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                        goto drop;
                }
                dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
-               vni = vxlan_tun_id_to_vni(info->key.tun_id);
+               vni = tunnel_id_to_key32(info->key.tun_id);
                remote_ip.sa.sa_family = ip_tunnel_info_af(info);
                if (remote_ip.sa.sa_family == AF_INET) {
                        remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
@@ -2106,6 +2103,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                                      vni, md, flags, udp_sum);
                if (err < 0) {
                        dst_release(ndst);
+                       dev->stats.tx_errors++;
                        return;
                }
                udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
@@ -2782,14 +2780,15 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
        struct net_device *lowerdev = NULL;
 
        if (conf->flags & VXLAN_F_GPE) {
-               if (conf->flags & ~VXLAN_F_ALLOWED_GPE)
-                       return -EINVAL;
                /* For now, allow GPE only together with COLLECT_METADATA.
                 * This can be relaxed later; in such case, the other side
                 * of the PtP link will have to be provided.
                 */
-               if (!(conf->flags & VXLAN_F_COLLECT_METADATA))
+               if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) ||
+                   !(conf->flags & VXLAN_F_COLLECT_METADATA)) {
+                       pr_info("unsupported combination of extensions\n");
                        return -EINVAL;
+               }
 
                vxlan_raw_setup(dev);
        } else {
@@ -2842,6 +2841,9 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
                        dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
 
                needed_headroom = lowerdev->hard_header_len;
+       } else if (vxlan_addr_multicast(&dst->remote_ip)) {
+               pr_info("multicast destination requires interface to be specified\n");
+               return -EINVAL;
        }
 
        if (conf->mtu) {
@@ -2874,8 +2876,10 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
                     tmp->cfg.saddr.sa.sa_family == AF_INET6) == use_ipv6 &&
                    tmp->cfg.dst_port == vxlan->cfg.dst_port &&
                    (tmp->flags & VXLAN_F_RCV_FLAGS) ==
-                   (vxlan->flags & VXLAN_F_RCV_FLAGS))
-               return -EEXIST;
+                   (vxlan->flags & VXLAN_F_RCV_FLAGS)) {
+                       pr_info("duplicate VNI %u\n", be32_to_cpu(conf->vni));
+                       return -EEXIST;
+               }
        }
 
        dev->ethtool_ops = &vxlan_ethtool_ops;
@@ -2909,7 +2913,6 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
                         struct nlattr *tb[], struct nlattr *data[])
 {
        struct vxlan_config conf;
-       int err;
 
        memset(&conf, 0, sizeof(conf));
 
@@ -3018,26 +3021,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
        if (tb[IFLA_MTU])
                conf.mtu = nla_get_u32(tb[IFLA_MTU]);
 
-       err = vxlan_dev_configure(src_net, dev, &conf);
-       switch (err) {
-       case -ENODEV:
-               pr_info("ifindex %d does not exist\n", conf.remote_ifindex);
-               break;
-
-       case -EPERM:
-               pr_info("IPv6 is disabled via sysctl\n");
-               break;
-
-       case -EEXIST:
-               pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni));
-               break;
-
-       case -EINVAL:
-               pr_info("unsupported combination of extensions\n");
-               break;
-       }
-
-       return err;
+       return vxlan_dev_configure(src_net, dev, &conf);
 }
 
 static void vxlan_dellink(struct net_device *dev, struct list_head *head)
index 6f04445..5fbf83d 100644 (file)
@@ -162,7 +162,7 @@ static int uhdlc_init(struct ucc_hdlc_private *priv)
                                ALIGNMENT_OF_UCC_HDLC_PRAM);
 
        if (priv->ucc_pram_offset < 0) {
-               dev_err(priv->dev, "Can not allocate MURAM for hdlc prameter.\n");
+               dev_err(priv->dev, "Can not allocate MURAM for hdlc parameter.\n");
                ret = -ENOMEM;
                goto free_tx_bd;
        }
index d98c7e5..3a421ca 100644 (file)
@@ -582,8 +582,8 @@ handle_channel( struct net_device  *dev )
 
 
 /*
- * Routine returns 1 if it need to acknoweledge received frame.
- * Empty frame received without errors won't be acknoweledged.
+ * Routine returns 1 if it needs to acknowledge received frame.
+ * Empty frame received without errors won't be acknowledged.
  */
 
 static int
index 201a980..748eaa6 100644 (file)
@@ -4527,7 +4527,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
                                (u8 *)&settings->beacon.head[ie_offset],
                                settings->beacon.head_len - ie_offset,
                                WLAN_EID_SSID);
-               if (!ssid_ie)
+               if (!ssid_ie || ssid_ie->len > IEEE80211_MAX_SSID_LEN)
                        return -EINVAL;
 
                memcpy(ssid_le.SSID, ssid_ie->data, ssid_ie->len);
@@ -5635,7 +5635,7 @@ static s32 brcmf_notify_vif_event(struct brcmf_if *ifp,
                  ifevent->action, ifevent->flags, ifevent->ifidx,
                  ifevent->bsscfgidx);
 
-       mutex_lock(&event->vif_event_lock);
+       spin_lock(&event->vif_event_lock);
        event->action = ifevent->action;
        vif = event->vif;
 
@@ -5643,7 +5643,7 @@ static s32 brcmf_notify_vif_event(struct brcmf_if *ifp,
        case BRCMF_E_IF_ADD:
                /* waiting process may have timed out */
                if (!cfg->vif_event.vif) {
-                       mutex_unlock(&event->vif_event_lock);
+                       spin_unlock(&event->vif_event_lock);
                        return -EBADF;
                }
 
@@ -5654,24 +5654,24 @@ static s32 brcmf_notify_vif_event(struct brcmf_if *ifp,
                        ifp->ndev->ieee80211_ptr = &vif->wdev;
                        SET_NETDEV_DEV(ifp->ndev, wiphy_dev(cfg->wiphy));
                }
-               mutex_unlock(&event->vif_event_lock);
+               spin_unlock(&event->vif_event_lock);
                wake_up(&event->vif_wq);
                return 0;
 
        case BRCMF_E_IF_DEL:
-               mutex_unlock(&event->vif_event_lock);
+               spin_unlock(&event->vif_event_lock);
                /* event may not be upon user request */
                if (brcmf_cfg80211_vif_event_armed(cfg))
                        wake_up(&event->vif_wq);
                return 0;
 
        case BRCMF_E_IF_CHANGE:
-               mutex_unlock(&event->vif_event_lock);
+               spin_unlock(&event->vif_event_lock);
                wake_up(&event->vif_wq);
                return 0;
 
        default:
-               mutex_unlock(&event->vif_event_lock);
+               spin_unlock(&event->vif_event_lock);
                break;
        }
        return -EINVAL;
@@ -5792,7 +5792,7 @@ static void wl_deinit_priv(struct brcmf_cfg80211_info *cfg)
 static void init_vif_event(struct brcmf_cfg80211_vif_event *event)
 {
        init_waitqueue_head(&event->vif_wq);
-       mutex_init(&event->vif_event_lock);
+       spin_lock_init(&event->vif_event_lock);
 }
 
 static s32 brcmf_dongle_roam(struct brcmf_if *ifp)
@@ -6691,9 +6691,9 @@ static inline bool vif_event_equals(struct brcmf_cfg80211_vif_event *event,
 {
        u8 evt_action;
 
-       mutex_lock(&event->vif_event_lock);
+       spin_lock(&event->vif_event_lock);
        evt_action = event->action;
-       mutex_unlock(&event->vif_event_lock);
+       spin_unlock(&event->vif_event_lock);
        return evt_action == action;
 }
 
@@ -6702,10 +6702,10 @@ void brcmf_cfg80211_arm_vif_event(struct brcmf_cfg80211_info *cfg,
 {
        struct brcmf_cfg80211_vif_event *event = &cfg->vif_event;
 
-       mutex_lock(&event->vif_event_lock);
+       spin_lock(&event->vif_event_lock);
        event->vif = vif;
        event->action = 0;
-       mutex_unlock(&event->vif_event_lock);
+       spin_unlock(&event->vif_event_lock);
 }
 
 bool brcmf_cfg80211_vif_event_armed(struct brcmf_cfg80211_info *cfg)
@@ -6713,9 +6713,9 @@ bool brcmf_cfg80211_vif_event_armed(struct brcmf_cfg80211_info *cfg)
        struct brcmf_cfg80211_vif_event *event = &cfg->vif_event;
        bool armed;
 
-       mutex_lock(&event->vif_event_lock);
+       spin_lock(&event->vif_event_lock);
        armed = event->vif != NULL;
-       mutex_unlock(&event->vif_event_lock);
+       spin_unlock(&event->vif_event_lock);
 
        return armed;
 }
index 7d77f86..8889832 100644 (file)
@@ -227,7 +227,7 @@ struct escan_info {
  */
 struct brcmf_cfg80211_vif_event {
        wait_queue_head_t vif_wq;
-       struct mutex vif_event_lock;
+       spinlock_t vif_event_lock;
        u8 action;
        struct brcmf_cfg80211_vif *vif;
 };
index 8d16f02..65e8c87 100644 (file)
@@ -743,7 +743,7 @@ static void brcmf_del_if(struct brcmf_pub *drvr, s32 bsscfgidx,
                 * serious troublesome side effects. The p2p module will clean
                 * up the ifp if needed.
                 */
-               brcmf_p2p_ifp_removed(ifp);
+               brcmf_p2p_ifp_removed(ifp, rtnl_locked);
                kfree(ifp);
        }
 }
index 66f942f..de19c7c 100644 (file)
@@ -2297,7 +2297,7 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev)
        return err;
 }
 
-void brcmf_p2p_ifp_removed(struct brcmf_if *ifp)
+void brcmf_p2p_ifp_removed(struct brcmf_if *ifp, bool rtnl_locked)
 {
        struct brcmf_cfg80211_info *cfg;
        struct brcmf_cfg80211_vif *vif;
@@ -2306,9 +2306,11 @@ void brcmf_p2p_ifp_removed(struct brcmf_if *ifp)
        vif = ifp->vif;
        cfg = wdev_to_cfg(&vif->wdev);
        cfg->p2p.bss_idx[P2PAPI_BSSCFG_DEVICE].vif = NULL;
-       rtnl_lock();
+       if (!rtnl_locked)
+               rtnl_lock();
        cfg80211_unregister_wdev(&vif->wdev);
-       rtnl_unlock();
+       if (!rtnl_locked)
+               rtnl_unlock();
        brcmf_free_vif(vif);
 }
 
index a3bd18c..8ce9447 100644 (file)
@@ -155,7 +155,7 @@ struct wireless_dev *brcmf_p2p_add_vif(struct wiphy *wiphy, const char *name,
 int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev);
 int brcmf_p2p_ifchange(struct brcmf_cfg80211_info *cfg,
                       enum brcmf_fil_p2p_if_types if_type);
-void brcmf_p2p_ifp_removed(struct brcmf_if *ifp);
+void brcmf_p2p_ifp_removed(struct brcmf_if *ifp, bool rtnl_locked);
 int brcmf_p2p_start_device(struct wiphy *wiphy, struct wireless_dev *wdev);
 void brcmf_p2p_stop_device(struct wiphy *wiphy, struct wireless_dev *wdev);
 int brcmf_p2p_scan_prep(struct wiphy *wiphy,
index 726ede3..3bba521 100644 (file)
@@ -1320,7 +1320,7 @@ struct il_priv {
        u64 timestamp;
 
        union {
-#if defined(CONFIG_IWL3945) || defined(CONFIG_IWL3945_MODULE)
+#if IS_ENABLED(CONFIG_IWL3945)
                struct {
                        void *shared_virt;
                        dma_addr_t shared_phys;
@@ -1351,7 +1351,7 @@ struct il_priv {
 
                } _3945;
 #endif
-#if defined(CONFIG_IWL4965) || defined(CONFIG_IWL4965_MODULE)
+#if IS_ENABLED(CONFIG_IWL4965)
                struct {
                        struct il_rx_phy_res last_phy_res;
                        bool last_phy_res_valid;
index 1abcabb..46b52bf 100644 (file)
@@ -960,5 +960,6 @@ int iwl_mvm_start_fw_dbg_conf(struct iwl_mvm *mvm, u8 conf_id)
        }
 
        mvm->fw_dbg_conf = conf_id;
-       return ret;
+
+       return 0;
 }
index f7dff76..e9f1be9 100644 (file)
@@ -105,7 +105,8 @@ iwl_fw_dbg_trigger_vif_match(struct iwl_fw_dbg_trigger_tlv *trig,
 {
        u32 trig_vif = le32_to_cpu(trig->vif_type);
 
-       return trig_vif == IWL_FW_DBG_CONF_VIF_ANY || vif->type == trig_vif;
+       return trig_vif == IWL_FW_DBG_CONF_VIF_ANY ||
+              ieee80211_vif_type_p2p(vif) == trig_vif;
 }
 
 static inline bool
index f5290c4..9506e65 100644 (file)
@@ -632,6 +632,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
        hw->wiphy->features |= NL80211_FEATURE_P2P_GO_CTWIN |
                               NL80211_FEATURE_LOW_PRIORITY_SCAN |
                               NL80211_FEATURE_P2P_GO_OPPPS |
+                              NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE |
                               NL80211_FEATURE_DYNAMIC_SMPS |
                               NL80211_FEATURE_STATIC_SMPS |
                               NL80211_FEATURE_SUPPORTS_WMM_ADMISSION;
index 1806495..2e30990 100644 (file)
@@ -467,6 +467,8 @@ struct iwl_mvm_vif {
 static inline struct iwl_mvm_vif *
 iwl_mvm_vif_from_mac80211(struct ieee80211_vif *vif)
 {
+       if (!vif)
+               return NULL;
        return (void *)vif->drv_priv;
 }
 
index 84d6cbd..3a56268 100644 (file)
@@ -412,4 +412,8 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len,
 
 void xenvif_set_skb_hash(struct xenvif *vif, struct sk_buff *skb);
 
+#ifdef CONFIG_DEBUG_FS
+void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m);
+#endif
+
 #endif /* __XEN_NETBACK__COMMON_H__ */
index fb87cb3..e8c5ddd 100644 (file)
 #include <linux/vmalloc.h>
 #include <linux/rculist.h>
 
-static void xenvif_del_hash(struct rcu_head *rcu)
-{
-       struct xenvif_hash_cache_entry *entry;
-
-       entry = container_of(rcu, struct xenvif_hash_cache_entry, rcu);
-
-       kfree(entry);
-}
-
 static void xenvif_add_hash(struct xenvif *vif, const u8 *tag,
                            unsigned int len, u32 val)
 {
@@ -76,7 +67,7 @@ static void xenvif_add_hash(struct xenvif *vif, const u8 *tag,
                if (++vif->hash.cache.count > xenvif_hash_cache_size) {
                        list_del_rcu(&oldest->link);
                        vif->hash.cache.count--;
-                       call_rcu(&oldest->rcu, xenvif_del_hash);
+                       kfree_rcu(oldest, rcu);
                }
        }
 
@@ -114,7 +105,7 @@ static void xenvif_flush_hash(struct xenvif *vif)
        list_for_each_entry_rcu(entry, &vif->hash.cache.list, link) {
                list_del_rcu(&entry->link);
                vif->hash.cache.count--;
-               call_rcu(&entry->rcu, xenvif_del_hash);
+               kfree_rcu(entry, rcu);
        }
 
        spin_unlock_irqrestore(&vif->hash.cache.lock, flags);
@@ -369,6 +360,74 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len,
        return XEN_NETIF_CTRL_STATUS_SUCCESS;
 }
 
+#ifdef CONFIG_DEBUG_FS
+void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m)
+{
+       unsigned int i;
+
+       switch (vif->hash.alg) {
+       case XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ:
+               seq_puts(m, "Hash Algorithm: TOEPLITZ\n");
+               break;
+
+       case XEN_NETIF_CTRL_HASH_ALGORITHM_NONE:
+               seq_puts(m, "Hash Algorithm: NONE\n");
+               /* FALLTHRU */
+       default:
+               return;
+       }
+
+       if (vif->hash.flags) {
+               seq_puts(m, "\nHash Flags:\n");
+
+               if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4)
+                       seq_puts(m, "- IPv4\n");
+               if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP)
+                       seq_puts(m, "- IPv4 + TCP\n");
+               if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6)
+                       seq_puts(m, "- IPv6\n");
+               if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP)
+                       seq_puts(m, "- IPv6 + TCP\n");
+       }
+
+       seq_puts(m, "\nHash Key:\n");
+
+       for (i = 0; i < XEN_NETBK_MAX_HASH_KEY_SIZE; ) {
+               unsigned int j, n;
+
+               n = 8;
+               if (i + n >= XEN_NETBK_MAX_HASH_KEY_SIZE)
+                       n = XEN_NETBK_MAX_HASH_KEY_SIZE - i;
+
+               seq_printf(m, "[%2u - %2u]: ", i, i + n - 1);
+
+               for (j = 0; j < n; j++, i++)
+                       seq_printf(m, "%02x ", vif->hash.key[i]);
+
+               seq_puts(m, "\n");
+       }
+
+       if (vif->hash.size != 0) {
+               seq_puts(m, "\nHash Mapping:\n");
+
+               for (i = 0; i < vif->hash.size; ) {
+                       unsigned int j, n;
+
+                       n = 8;
+                       if (i + n >= vif->hash.size)
+                               n = vif->hash.size - i;
+
+                       seq_printf(m, "[%4u - %4u]: ", i, i + n - 1);
+
+                       for (j = 0; j < n; j++, i++)
+                               seq_printf(m, "%4u ", vif->hash.mapping[i]);
+
+                       seq_puts(m, "\n");
+               }
+       }
+}
+#endif /* CONFIG_DEBUG_FS */
+
 void xenvif_init_hash(struct xenvif *vif)
 {
        if (xenvif_hash_cache_size == 0)
index 6a31f26..bacf6e0 100644 (file)
@@ -165,7 +165,7 @@ xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count,
        return count;
 }
 
-static int xenvif_dump_open(struct inode *inode, struct file *filp)
+static int xenvif_io_ring_open(struct inode *inode, struct file *filp)
 {
        int ret;
        void *queue = NULL;
@@ -179,13 +179,35 @@ static int xenvif_dump_open(struct inode *inode, struct file *filp)
 
 static const struct file_operations xenvif_dbg_io_ring_ops_fops = {
        .owner = THIS_MODULE,
-       .open = xenvif_dump_open,
+       .open = xenvif_io_ring_open,
        .read = seq_read,
        .llseek = seq_lseek,
        .release = single_release,
        .write = xenvif_write_io_ring,
 };
 
+static int xenvif_read_ctrl(struct seq_file *m, void *v)
+{
+       struct xenvif *vif = m->private;
+
+       xenvif_dump_hash_info(vif, m);
+
+       return 0;
+}
+
+static int xenvif_ctrl_open(struct inode *inode, struct file *filp)
+{
+       return single_open(filp, xenvif_read_ctrl, inode->i_private);
+}
+
+static const struct file_operations xenvif_dbg_ctrl_ops_fops = {
+       .owner = THIS_MODULE,
+       .open = xenvif_ctrl_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
 static void xenvif_debugfs_addif(struct xenvif *vif)
 {
        struct dentry *pfile;
@@ -210,6 +232,17 @@ static void xenvif_debugfs_addif(struct xenvif *vif)
                                pr_warn("Creation of io_ring file returned %ld!\n",
                                        PTR_ERR(pfile));
                }
+
+               if (vif->ctrl_task) {
+                       pfile = debugfs_create_file("ctrl",
+                                                   S_IRUSR,
+                                                   vif->xenvif_dbg_root,
+                                                   vif,
+                                                   &xenvif_dbg_ctrl_ops_fops);
+                       if (IS_ERR_OR_NULL(pfile))
+                               pr_warn("Creation of ctrl file returned %ld!\n",
+                                       PTR_ERR(pfile));
+               }
        } else
                netdev_warn(vif->dev,
                            "Creation of vif debugfs dir returned %ld!\n",
index 458daf9..935866f 100644 (file)
@@ -185,8 +185,12 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
                return -ENXIO;
 
        nd_desc = nvdimm_bus->nd_desc;
+       /*
+        * if ndctl does not exist, it's PMEM_LEGACY and
+        * we want to just pretend everything is handled.
+        */
        if (!nd_desc->ndctl)
-               return -ENXIO;
+               return len;
 
        memset(&ars_cap, 0, sizeof(ars_cap));
        ars_cap.address = phys;
index db39d53..f7d37a6 100644 (file)
@@ -30,8 +30,8 @@ config NVME_FABRICS
 
 config NVME_RDMA
        tristate "NVM Express over Fabrics RDMA host driver"
-       depends on INFINIBAND
-       depends on BLK_DEV_NVME
+       depends on INFINIBAND && BLOCK
+       select NVME_CORE
        select NVME_FABRICS
        select SG_POOL
        help
index 7ff2e82..2feacc7 100644 (file)
@@ -81,10 +81,12 @@ EXPORT_SYMBOL_GPL(nvme_cancel_request);
 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
                enum nvme_ctrl_state new_state)
 {
-       enum nvme_ctrl_state old_state = ctrl->state;
+       enum nvme_ctrl_state old_state;
        bool changed = false;
 
        spin_lock_irq(&ctrl->lock);
+
+       old_state = ctrl->state;
        switch (new_state) {
        case NVME_CTRL_LIVE:
                switch (old_state) {
@@ -140,11 +142,12 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
        default:
                break;
        }
-       spin_unlock_irq(&ctrl->lock);
 
        if (changed)
                ctrl->state = new_state;
 
+       spin_unlock_irq(&ctrl->lock);
+
        return changed;
 }
 EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
@@ -608,7 +611,7 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
 
        ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0,
                        NVME_QID_ANY, 0, 0);
-       if (ret >= 0)
+       if (ret >= 0 && result)
                *result = le32_to_cpu(cqe.result);
        return ret;
 }
@@ -628,7 +631,7 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
 
        ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0,
                        NVME_QID_ANY, 0, 0);
-       if (ret >= 0)
+       if (ret >= 0 && result)
                *result = le32_to_cpu(cqe.result);
        return ret;
 }
index dc99676..4eff491 100644 (file)
@@ -47,8 +47,10 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn)
 
        mutex_lock(&nvmf_hosts_mutex);
        host = __nvmf_host_find(hostnqn);
-       if (host)
+       if (host) {
+               kref_get(&host->ref);
                goto out_unlock;
+       }
 
        host = kmalloc(sizeof(*host), GFP_KERNEL);
        if (!host)
@@ -56,7 +58,7 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn)
 
        kref_init(&host->ref);
        memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
-       uuid_le_gen(&host->id);
+       uuid_be_gen(&host->id);
 
        list_add_tail(&host->list, &nvmf_hosts);
 out_unlock:
@@ -73,9 +75,9 @@ static struct nvmf_host *nvmf_host_default(void)
                return NULL;
 
        kref_init(&host->ref);
-       uuid_le_gen(&host->id);
+       uuid_be_gen(&host->id);
        snprintf(host->nqn, NVMF_NQN_SIZE,
-               "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUl", &host->id);
+               "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id);
 
        mutex_lock(&nvmf_hosts_mutex);
        list_add_tail(&host->list, &nvmf_hosts);
@@ -363,7 +365,14 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
        cmd.connect.opcode = nvme_fabrics_command;
        cmd.connect.fctype = nvme_fabrics_type_connect;
        cmd.connect.qid = 0;
-       cmd.connect.sqsize = cpu_to_le16(ctrl->sqsize);
+
+       /*
+        * fabrics spec sets a minimum of depth 32 for admin queue,
+        * so set the queue with this depth always until
+        * justification otherwise.
+        */
+       cmd.connect.sqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+
        /*
         * Set keep-alive timeout in seconds granularity (ms * 1000)
         * and add a grace period for controller kato enforcement
@@ -375,7 +384,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
        if (!data)
                return -ENOMEM;
 
-       memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_le));
+       memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be));
        data->cntlid = cpu_to_le16(0xffff);
        strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
        strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
@@ -434,7 +443,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
        if (!data)
                return -ENOMEM;
 
-       memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_le));
+       memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be));
        data->cntlid = cpu_to_le16(ctrl->cntlid);
        strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
        strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
index 89df52c..46e460a 100644 (file)
@@ -34,7 +34,7 @@ struct nvmf_host {
        struct kref             ref;
        struct list_head        list;
        char                    nqn[NVMF_NQN_SIZE];
-       uuid_le                 id;
+       uuid_be                 id;
 };
 
 /**
index 8d2875b..ab545fb 100644 (file)
 
 #define NVME_RDMA_MAX_INLINE_SEGMENTS  1
 
-#define NVME_RDMA_MAX_PAGES_PER_MR     512
-
-#define NVME_RDMA_DEF_RECONNECT_DELAY  20
-
 /*
  * We handle AEN commands ourselves and don't even let the
  * block layer know about them.
@@ -77,7 +73,6 @@ struct nvme_rdma_request {
        u32                     num_sge;
        int                     nents;
        bool                    inline_data;
-       bool                    need_inval;
        struct ib_reg_wr        reg_wr;
        struct ib_cqe           reg_cqe;
        struct nvme_rdma_queue  *queue;
@@ -286,7 +281,7 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq)
        struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
        int ret = 0;
 
-       if (!req->need_inval)
+       if (!req->mr->need_inval)
                goto out;
 
        ib_dereg_mr(req->mr);
@@ -298,7 +293,7 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq)
                req->mr = NULL;
        }
 
-       req->need_inval = false;
+       req->mr->need_inval = false;
 
 out:
        return ret;
@@ -645,7 +640,8 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
        int i, ret;
 
        for (i = 1; i < ctrl->queue_count; i++) {
-               ret = nvme_rdma_init_queue(ctrl, i, ctrl->ctrl.sqsize);
+               ret = nvme_rdma_init_queue(ctrl, i,
+                                          ctrl->ctrl.opts->queue_size);
                if (ret) {
                        dev_info(ctrl->ctrl.device,
                                "failed to initialize i/o queue: %d\n", ret);
@@ -849,7 +845,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
        if (!blk_rq_bytes(rq))
                return;
 
-       if (req->need_inval) {
+       if (req->mr->need_inval) {
                res = nvme_rdma_inv_rkey(queue, req);
                if (res < 0) {
                        dev_err(ctrl->ctrl.device,
@@ -935,7 +931,7 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
                             IB_ACCESS_REMOTE_READ |
                             IB_ACCESS_REMOTE_WRITE;
 
-       req->need_inval = true;
+       req->mr->need_inval = true;
 
        sg->addr = cpu_to_le64(req->mr->iova);
        put_unaligned_le24(req->mr->length, sg->length);
@@ -958,7 +954,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 
        req->num_sge = 1;
        req->inline_data = false;
-       req->need_inval = false;
+       req->mr->need_inval = false;
 
        c->common.flags |= NVME_CMD_SGL_METABUF;
 
@@ -1145,7 +1141,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
 
        if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) &&
            wc->ex.invalidate_rkey == req->mr->rkey)
-               req->need_inval = false;
+               req->mr->need_inval = false;
 
        blk_mq_complete_request(rq, status);
 
@@ -1278,8 +1274,22 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
 
        priv.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
        priv.qid = cpu_to_le16(nvme_rdma_queue_idx(queue));
-       priv.hrqsize = cpu_to_le16(queue->queue_size);
-       priv.hsqsize = cpu_to_le16(queue->queue_size);
+       /*
+        * set the admin queue depth to the minimum size
+        * specified by the Fabrics standard.
+        */
+       if (priv.qid == 0) {
+               priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH);
+               priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+       } else {
+               /*
+                * current interpretation of the fabrics spec
+                * is at minimum you make hrqsize sqsize+1, or a
+                * 1's based representation of sqsize.
+                */
+               priv.hrqsize = cpu_to_le16(queue->queue_size);
+               priv.hsqsize = cpu_to_le16(queue->ctrl->ctrl.sqsize);
+       }
 
        ret = rdma_connect(queue->cm_id, &param);
        if (ret) {
@@ -1319,7 +1329,7 @@ out_destroy_queue_ib:
 static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue)
 {
        struct nvme_rdma_ctrl *ctrl = queue->ctrl;
-       int ret;
+       int ret = 0;
 
        /* Own the controller deletion */
        if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
@@ -1461,7 +1471,7 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
        if (rq->cmd_type == REQ_TYPE_FS && req_op(rq) == REQ_OP_FLUSH)
                flush = true;
        ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
-                       req->need_inval ? &req->reg_wr.wr : NULL, flush);
+                       req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
        if (ret) {
                nvme_rdma_unmap_data(queue, rq);
                goto err;
@@ -1816,7 +1826,7 @@ static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl)
 
        memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
        ctrl->tag_set.ops = &nvme_rdma_mq_ops;
-       ctrl->tag_set.queue_depth = ctrl->ctrl.sqsize;
+       ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
        ctrl->tag_set.reserved_tags = 1; /* fabric connect */
        ctrl->tag_set.numa_node = NUMA_NO_NODE;
        ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
@@ -1914,7 +1924,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
        spin_lock_init(&ctrl->lock);
 
        ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
-       ctrl->ctrl.sqsize = opts->queue_size;
+       ctrl->ctrl.sqsize = opts->queue_size - 1;
        ctrl->ctrl.kato = opts->kato;
 
        ret = -ENOMEM;
index a5c31cb..3a5b9d0 100644 (file)
@@ -15,8 +15,8 @@ config NVME_TARGET
 
 config NVME_TARGET_LOOP
        tristate "NVMe loopback device support"
-       depends on BLK_DEV_NVME
        depends on NVME_TARGET
+       select NVME_CORE
        select NVME_FABRICS
        select SG_POOL
        help
index 7affd40..395e60d 100644 (file)
@@ -556,7 +556,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
 
        memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
        ctrl->tag_set.ops = &nvme_loop_mq_ops;
-       ctrl->tag_set.queue_depth = ctrl->ctrl.sqsize;
+       ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
        ctrl->tag_set.reserved_tags = 1; /* fabric connect */
        ctrl->tag_set.numa_node = NUMA_NO_NODE;
        ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
@@ -620,7 +620,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 
        ret = -ENOMEM;
 
-       ctrl->ctrl.sqsize = opts->queue_size;
+       ctrl->ctrl.sqsize = opts->queue_size - 1;
        ctrl->ctrl.kato = opts->kato;
 
        ctrl->queues = kcalloc(opts->nr_io_queues + 1, sizeof(*ctrl->queues),
index b4d6485..1cbe6e0 100644 (file)
@@ -978,10 +978,11 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w)
                container_of(w, struct nvmet_rdma_queue, release_work);
        struct rdma_cm_id *cm_id = queue->cm_id;
        struct nvmet_rdma_device *dev = queue->dev;
+       enum nvmet_rdma_queue_state state = queue->state;
 
        nvmet_rdma_free_queue(queue);
 
-       if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL)
+       if (state != NVMET_RDMA_IN_DEVICE_REMOVAL)
                rdma_destroy_id(cm_id);
 
        kref_put(&dev->ref, nvmet_rdma_free_dev);
@@ -1003,10 +1004,10 @@ nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn,
        queue->host_qid = le16_to_cpu(req->qid);
 
        /*
-        * req->hsqsize corresponds to our recv queue size
+        * req->hsqsize corresponds to our recv queue size plus 1
         * req->hrqsize corresponds to our send queue size
         */
-       queue->recv_queue_size = le16_to_cpu(req->hsqsize);
+       queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1;
        queue->send_queue_size = le16_to_cpu(req->hrqsize);
 
        if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH)
index 7792266..3ce6953 100644 (file)
@@ -1631,8 +1631,7 @@ static int __of_parse_phandle_with_args(const struct device_node *np,
         */
 
  err:
-       if (it.node)
-               of_node_put(it.node);
+       of_node_put(it.node);
        return rc;
 }
 
@@ -2343,20 +2342,13 @@ struct device_node *of_graph_get_endpoint_by_regs(
        const struct device_node *parent, int port_reg, int reg)
 {
        struct of_endpoint endpoint;
-       struct device_node *node, *prev_node = NULL;
-
-       while (1) {
-               node = of_graph_get_next_endpoint(parent, prev_node);
-               of_node_put(prev_node);
-               if (!node)
-                       break;
+       struct device_node *node = NULL;
 
+       for_each_endpoint_of_node(parent, node) {
                of_graph_parse_endpoint(node, &endpoint);
                if (((port_reg == -1) || (endpoint.port == port_reg)) &&
                        ((reg == -1) || (endpoint.id == reg)))
                        return node;
-
-               prev_node = node;
        }
 
        return NULL;
index 55f1b83..085c638 100644 (file)
@@ -517,7 +517,7 @@ static void *__unflatten_device_tree(const void *blob,
                pr_warning("End of tree marker overwritten: %08x\n",
                           be32_to_cpup(mem + size));
 
-       if (detached) {
+       if (detached && mynodes) {
                of_node_set_flag(*mynodes, OF_DETACHED);
                pr_debug("unflattened tree is detached\n");
        }
index 89a71c6..a2e68f7 100644 (file)
@@ -544,12 +544,15 @@ void __init of_irq_init(const struct of_device_id *matches)
 
                        list_del(&desc->list);
 
+                       of_node_set_flag(desc->dev, OF_POPULATED);
+
                        pr_debug("of_irq_init: init %s (%p), parent %p\n",
                                 desc->dev->full_name,
                                 desc->dev, desc->interrupt_parent);
                        ret = desc->irq_init_cb(desc->dev,
                                                desc->interrupt_parent);
                        if (ret) {
+                               of_node_clear_flag(desc->dev, OF_POPULATED);
                                kfree(desc);
                                continue;
                        }
@@ -559,8 +562,6 @@ void __init of_irq_init(const struct of_device_id *matches)
                         * its children can get processed in a subsequent pass.
                         */
                        list_add_tail(&desc->list, &intc_parent_list);
-
-                       of_node_set_flag(desc->dev, OF_POPULATED);
                }
 
                /* Get the next pending parent that might have children */
index 8aa1976..f39ccd5 100644 (file)
@@ -497,6 +497,7 @@ int of_platform_default_populate(struct device_node *root,
 }
 EXPORT_SYMBOL_GPL(of_platform_default_populate);
 
+#ifndef CONFIG_PPC
 static int __init of_platform_default_populate_init(void)
 {
        struct device_node *node;
@@ -521,6 +522,7 @@ static int __init of_platform_default_populate_init(void)
        return 0;
 }
 arch_initcall_sync(of_platform_default_populate_init);
+#endif
 
 static int of_platform_device_destroy(struct device *dev, void *data)
 {
index 5f4a2e0..add6623 100644 (file)
@@ -44,6 +44,7 @@ void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
        bridge->release_fn = release_fn;
        bridge->release_data = release_data;
 }
+EXPORT_SYMBOL_GPL(pci_set_host_bridge_release);
 
 void pcibios_resource_to_bus(struct pci_bus *bus, struct pci_bus_region *region,
                             struct resource *res)
index eafa613..98f1222 100644 (file)
@@ -1069,7 +1069,7 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
                nvec = maxvec;
 
        for (;;) {
-               if (!(flags & PCI_IRQ_NOAFFINITY)) {
+               if (flags & PCI_IRQ_AFFINITY) {
                        dev->irq_affinity = irq_create_affinity_mask(&nvec);
                        if (nvec < minvec)
                                return -ENOSPC;
@@ -1105,7 +1105,7 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
  **/
 int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
 {
-       return __pci_enable_msi_range(dev, minvec, maxvec, PCI_IRQ_NOAFFINITY);
+       return __pci_enable_msi_range(dev, minvec, maxvec, 0);
 }
 EXPORT_SYMBOL(pci_enable_msi_range);
 
@@ -1120,7 +1120,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
                return -ERANGE;
 
        for (;;) {
-               if (!(flags & PCI_IRQ_NOAFFINITY)) {
+               if (flags & PCI_IRQ_AFFINITY) {
                        dev->irq_affinity = irq_create_affinity_mask(&nvec);
                        if (nvec < minvec)
                                return -ENOSPC;
@@ -1160,8 +1160,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
 int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
                int minvec, int maxvec)
 {
-       return __pci_enable_msix_range(dev, entries, minvec, maxvec,
-                       PCI_IRQ_NOAFFINITY);
+       return __pci_enable_msix_range(dev, entries, minvec, maxvec, 0);
 }
 EXPORT_SYMBOL(pci_enable_msix_range);
 
@@ -1187,22 +1186,25 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
 {
        int vecs = -ENOSPC;
 
-       if (!(flags & PCI_IRQ_NOMSIX)) {
+       if (flags & PCI_IRQ_MSIX) {
                vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs,
                                flags);
                if (vecs > 0)
                        return vecs;
        }
 
-       if (!(flags & PCI_IRQ_NOMSI)) {
+       if (flags & PCI_IRQ_MSI) {
                vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, flags);
                if (vecs > 0)
                        return vecs;
        }
 
        /* use legacy irq if allowed */
-       if (!(flags & PCI_IRQ_NOLEGACY) && min_vecs == 1)
+       if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1) {
+               pci_intx(dev, 1);
                return 1;
+       }
+
        return vecs;
 }
 EXPORT_SYMBOL(pci_alloc_irq_vectors);
index 37ff015..44e0ff3 100644 (file)
@@ -3327,9 +3327,9 @@ static void quirk_apple_wait_for_thunderbolt(struct pci_dev *dev)
        if (nhi->vendor != PCI_VENDOR_ID_INTEL
                    || (nhi->device != PCI_DEVICE_ID_INTEL_LIGHT_RIDGE &&
                        nhi->device != PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C &&
+                       nhi->device != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_NHI &&
                        nhi->device != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI)
-                   || nhi->subsystem_vendor != 0x2222
-                   || nhi->subsystem_device != 0x1111)
+                   || nhi->class != PCI_CLASS_SYSTEM_OTHER << 8)
                goto out;
        dev_info(&dev->dev, "quirk: waiting for thunderbolt to reestablish PCI tunnels...\n");
        device_pm_wait_for_dev(&dev->dev, &nhi->dev);
@@ -3343,6 +3343,9 @@ DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,
                               PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C,
                               quirk_apple_wait_for_thunderbolt);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,
+                              PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_BRIDGE,
+                              quirk_apple_wait_for_thunderbolt);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,
                               PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_BRIDGE,
                               quirk_apple_wait_for_thunderbolt);
index c494613..f5e1008 100644 (file)
@@ -925,6 +925,7 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu)
                        if (i > 0 && spi != using_spi) {
                                pr_err("PPI/SPI IRQ type mismatch for %s!\n",
                                        dn->name);
+                               of_node_put(dn);
                                kfree(irqs);
                                return -EINVAL;
                        }
@@ -969,7 +970,7 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu)
        if (cpumask_weight(&pmu->supported_cpus) == 0) {
                int irq = platform_get_irq(pdev, 0);
 
-               if (irq_is_percpu(irq)) {
+               if (irq >= 0 && irq_is_percpu(irq)) {
                        /* If using PPIs, check the affinity of the partition */
                        int ret;
 
index 18d6626..8ffc44a 100644 (file)
@@ -367,7 +367,7 @@ static int brcm_sata_phy_init(struct phy *phy)
                rc = -ENODEV;
        };
 
-       return 0;
+       return rc;
 }
 
 static const struct phy_ops phy_ops = {
index 0a45bc6..8c7eb33 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/power_supply.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
+#include <linux/usb/of.h>
 #include <linux/workqueue.h>
 
 #define REG_ISCR                       0x00
@@ -110,6 +111,7 @@ struct sun4i_usb_phy_cfg {
 struct sun4i_usb_phy_data {
        void __iomem *base;
        const struct sun4i_usb_phy_cfg *cfg;
+       enum usb_dr_mode dr_mode;
        struct mutex mutex;
        struct sun4i_usb_phy {
                struct phy *phy;
@@ -120,6 +122,7 @@ struct sun4i_usb_phy_data {
                bool regulator_on;
                int index;
        } phys[MAX_PHYS];
+       int first_phy;
        /* phy0 / otg related variables */
        struct extcon_dev *extcon;
        bool phy0_init;
@@ -285,16 +288,10 @@ static int sun4i_usb_phy_init(struct phy *_phy)
                sun4i_usb_phy0_update_iscr(_phy, 0, ISCR_DPDM_PULLUP_EN);
                sun4i_usb_phy0_update_iscr(_phy, 0, ISCR_ID_PULLUP_EN);
 
-               if (data->id_det_gpio) {
-                       /* OTG mode, force ISCR and cable state updates */
-                       data->id_det = -1;
-                       data->vbus_det = -1;
-                       queue_delayed_work(system_wq, &data->detect, 0);
-               } else {
-                       /* Host only mode */
-                       sun4i_usb_phy0_set_id_detect(_phy, 0);
-                       sun4i_usb_phy0_set_vbus_detect(_phy, 1);
-               }
+               /* Force ISCR and cable state updates */
+               data->id_det = -1;
+               data->vbus_det = -1;
+               queue_delayed_work(system_wq, &data->detect, 0);
        }
 
        return 0;
@@ -319,6 +316,19 @@ static int sun4i_usb_phy_exit(struct phy *_phy)
        return 0;
 }
 
+static int sun4i_usb_phy0_get_id_det(struct sun4i_usb_phy_data *data)
+{
+       switch (data->dr_mode) {
+       case USB_DR_MODE_OTG:
+               return gpiod_get_value_cansleep(data->id_det_gpio);
+       case USB_DR_MODE_HOST:
+               return 0;
+       case USB_DR_MODE_PERIPHERAL:
+       default:
+               return 1;
+       }
+}
+
 static int sun4i_usb_phy0_get_vbus_det(struct sun4i_usb_phy_data *data)
 {
        if (data->vbus_det_gpio)
@@ -432,7 +442,10 @@ static void sun4i_usb_phy0_id_vbus_det_scan(struct work_struct *work)
        struct phy *phy0 = data->phys[0].phy;
        int id_det, vbus_det, id_notify = 0, vbus_notify = 0;
 
-       id_det = gpiod_get_value_cansleep(data->id_det_gpio);
+       if (phy0 == NULL)
+               return;
+
+       id_det = sun4i_usb_phy0_get_id_det(data);
        vbus_det = sun4i_usb_phy0_get_vbus_det(data);
 
        mutex_lock(&phy0->mutex);
@@ -448,7 +461,8 @@ static void sun4i_usb_phy0_id_vbus_det_scan(struct work_struct *work)
                 * without vbus detection report vbus low for long enough for
                 * the musb-ip to end the current device session.
                 */
-               if (!sun4i_usb_phy0_have_vbus_det(data) && id_det == 0) {
+               if (data->dr_mode == USB_DR_MODE_OTG &&
+                   !sun4i_usb_phy0_have_vbus_det(data) && id_det == 0) {
                        sun4i_usb_phy0_set_vbus_detect(phy0, 0);
                        msleep(200);
                        sun4i_usb_phy0_set_vbus_detect(phy0, 1);
@@ -474,7 +488,8 @@ static void sun4i_usb_phy0_id_vbus_det_scan(struct work_struct *work)
                 * without vbus detection report vbus low for long enough to
                 * the musb-ip to end the current host session.
                 */
-               if (!sun4i_usb_phy0_have_vbus_det(data) && id_det == 1) {
+               if (data->dr_mode == USB_DR_MODE_OTG &&
+                   !sun4i_usb_phy0_have_vbus_det(data) && id_det == 1) {
                        mutex_lock(&phy0->mutex);
                        sun4i_usb_phy0_set_vbus_detect(phy0, 0);
                        msleep(1000);
@@ -519,7 +534,8 @@ static struct phy *sun4i_usb_phy_xlate(struct device *dev,
 {
        struct sun4i_usb_phy_data *data = dev_get_drvdata(dev);
 
-       if (args->args[0] >= data->cfg->num_phys)
+       if (args->args[0] < data->first_phy ||
+           args->args[0] >= data->cfg->num_phys)
                return ERR_PTR(-ENODEV);
 
        return data->phys[args->args[0]].phy;
@@ -593,13 +609,17 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev)
                        return -EPROBE_DEFER;
        }
 
-       /* vbus_det without id_det makes no sense, and is not supported */
-       if (sun4i_usb_phy0_have_vbus_det(data) && !data->id_det_gpio) {
-               dev_err(dev, "usb0_id_det missing or invalid\n");
-               return -ENODEV;
-       }
-
-       if (data->id_det_gpio) {
+       data->dr_mode = of_usb_get_dr_mode_by_phy(np, 0);
+       switch (data->dr_mode) {
+       case USB_DR_MODE_OTG:
+               /* otg without id_det makes no sense, and is not supported */
+               if (!data->id_det_gpio) {
+                       dev_err(dev, "usb0_id_det missing or invalid\n");
+                       return -ENODEV;
+               }
+               /* fall through */
+       case USB_DR_MODE_HOST:
+       case USB_DR_MODE_PERIPHERAL:
                data->extcon = devm_extcon_dev_allocate(dev,
                                                        sun4i_usb_phy0_cable);
                if (IS_ERR(data->extcon))
@@ -610,9 +630,13 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev)
                        dev_err(dev, "failed to register extcon: %d\n", ret);
                        return ret;
                }
+               break;
+       default:
+               dev_info(dev, "dr_mode unknown, not registering usb phy0\n");
+               data->first_phy = 1;
        }
 
-       for (i = 0; i < data->cfg->num_phys; i++) {
+       for (i = data->first_phy; i < data->cfg->num_phys; i++) {
                struct sun4i_usb_phy *phy = data->phys + i;
                char name[16];
 
index ac4f31a..28fce4b 100644 (file)
@@ -141,9 +141,9 @@ static int sun9i_usb_phy_probe(struct platform_device *pdev)
                }
 
                phy->hsic_clk = devm_clk_get(dev, "hsic_12M");
-               if (IS_ERR(phy->clk)) {
+               if (IS_ERR(phy->hsic_clk)) {
                        dev_err(dev, "failed to get hsic_12M clock\n");
-                       return PTR_ERR(phy->clk);
+                       return PTR_ERR(phy->hsic_clk);
                }
 
                phy->reset = devm_reset_control_get(dev, "hsic");
index 5749a4e..0fe8fad 100644 (file)
@@ -1539,12 +1539,11 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
                offset += range->npins;
        }
 
-       /* Mask and clear all interrupts */
-       chv_writel(0, pctrl->regs + CHV_INTMASK);
+       /* Clear all interrupts */
        chv_writel(0xffff, pctrl->regs + CHV_INTSTAT);
 
        ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, 0,
-                                  handle_simple_irq, IRQ_TYPE_NONE);
+                                  handle_bad_irq, IRQ_TYPE_NONE);
        if (ret) {
                dev_err(pctrl->dev, "failed to add IRQ chip\n");
                goto fail;
index 7bad200..55375b1 100644 (file)
@@ -809,17 +809,17 @@ static const struct pistachio_pin_group pistachio_groups[] = {
                           PADS_FUNCTION_SELECT2, 12, 0x3),
        MFIO_MUX_PIN_GROUP(83, MIPS_PLL_LOCK, MIPS_TRACE_DATA, USB_DEBUG,
                           PADS_FUNCTION_SELECT2, 14, 0x3),
-       MFIO_MUX_PIN_GROUP(84, SYS_PLL_LOCK, MIPS_TRACE_DATA, USB_DEBUG,
+       MFIO_MUX_PIN_GROUP(84, AUDIO_PLL_LOCK, MIPS_TRACE_DATA, USB_DEBUG,
                           PADS_FUNCTION_SELECT2, 16, 0x3),
-       MFIO_MUX_PIN_GROUP(85, WIFI_PLL_LOCK, MIPS_TRACE_DATA, SDHOST_DEBUG,
+       MFIO_MUX_PIN_GROUP(85, RPU_V_PLL_LOCK, MIPS_TRACE_DATA, SDHOST_DEBUG,
                           PADS_FUNCTION_SELECT2, 18, 0x3),
-       MFIO_MUX_PIN_GROUP(86, BT_PLL_LOCK, MIPS_TRACE_DATA, SDHOST_DEBUG,
+       MFIO_MUX_PIN_GROUP(86, RPU_L_PLL_LOCK, MIPS_TRACE_DATA, SDHOST_DEBUG,
                           PADS_FUNCTION_SELECT2, 20, 0x3),
-       MFIO_MUX_PIN_GROUP(87, RPU_V_PLL_LOCK, DREQ2, SOCIF_DEBUG,
+       MFIO_MUX_PIN_GROUP(87, SYS_PLL_LOCK, DREQ2, SOCIF_DEBUG,
                           PADS_FUNCTION_SELECT2, 22, 0x3),
-       MFIO_MUX_PIN_GROUP(88, RPU_L_PLL_LOCK, DREQ3, SOCIF_DEBUG,
+       MFIO_MUX_PIN_GROUP(88, WIFI_PLL_LOCK, DREQ3, SOCIF_DEBUG,
                           PADS_FUNCTION_SELECT2, 24, 0x3),
-       MFIO_MUX_PIN_GROUP(89, AUDIO_PLL_LOCK, DREQ4, DREQ5,
+       MFIO_MUX_PIN_GROUP(89, BT_PLL_LOCK, DREQ4, DREQ5,
                           PADS_FUNCTION_SELECT2, 26, 0x3),
        PIN_GROUP(TCK, "tck"),
        PIN_GROUP(TRSTN, "trstn"),
index ce483b0..f9d661e 100644 (file)
@@ -485,12 +485,12 @@ static const struct sunxi_desc_pin sun8i_a23_pins[] = {
        SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 8),
                  SUNXI_FUNCTION(0x0, "gpio_in"),
                  SUNXI_FUNCTION(0x1, "gpio_out"),
-                 SUNXI_FUNCTION(0x2, "uart2"),         /* RTS */
+                 SUNXI_FUNCTION(0x2, "uart1"),         /* RTS */
                  SUNXI_FUNCTION_IRQ_BANK(0x4, 2, 8)),  /* PG_EINT8 */
        SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 9),
                  SUNXI_FUNCTION(0x0, "gpio_in"),
                  SUNXI_FUNCTION(0x1, "gpio_out"),
-                 SUNXI_FUNCTION(0x2, "uart2"),         /* CTS */
+                 SUNXI_FUNCTION(0x2, "uart1"),         /* CTS */
                  SUNXI_FUNCTION_IRQ_BANK(0x4, 2, 9)),  /* PG_EINT9 */
        SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 10),
                  SUNXI_FUNCTION(0x0, "gpio_in"),
index 3040abe..3131cac 100644 (file)
@@ -407,12 +407,12 @@ static const struct sunxi_desc_pin sun8i_a33_pins[] = {
        SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 8),
                  SUNXI_FUNCTION(0x0, "gpio_in"),
                  SUNXI_FUNCTION(0x1, "gpio_out"),
-                 SUNXI_FUNCTION(0x2, "uart2"),         /* RTS */
+                 SUNXI_FUNCTION(0x2, "uart1"),         /* RTS */
                  SUNXI_FUNCTION_IRQ_BANK(0x4, 1, 8)),  /* PG_EINT8 */
        SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 9),
                  SUNXI_FUNCTION(0x0, "gpio_in"),
                  SUNXI_FUNCTION(0x1, "gpio_out"),
-                 SUNXI_FUNCTION(0x2, "uart2"),         /* CTS */
+                 SUNXI_FUNCTION(0x2, "uart1"),         /* CTS */
                  SUNXI_FUNCTION_IRQ_BANK(0x4, 1, 9)),  /* PG_EINT9 */
        SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 10),
                  SUNXI_FUNCTION(0x0, "gpio_in"),
index f99b183..374a802 100644 (file)
@@ -1,6 +1,8 @@
 /*
  * Generic driver for the OLPC Embedded Controller.
  *
+ * Author: Andres Salomon <dilinger@queued.net>
+ *
  * Copyright (C) 2011-2012 One Laptop per Child Foundation.
  *
  * Licensed under the GPL v2 or later.
@@ -12,7 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/list.h>
 #include <linux/olpc-ec.h>
 #include <asm/olpc.h>
@@ -326,8 +328,4 @@ static int __init olpc_ec_init_module(void)
 {
        return platform_driver_register(&olpc_ec_plat_driver);
 }
-
 arch_initcall(olpc_ec_init_module);
-
-MODULE_AUTHOR("Andres Salomon <dilinger@queued.net>");
-MODULE_LICENSE("GPL");
index 63b371d..91ae585 100644 (file)
@@ -1,6 +1,8 @@
 /* Moorestown PMIC GPIO (access through IPC) driver
  * Copyright (c) 2008 - 2009, Intel Corporation.
  *
+ * Author: Alek Du <alek.du@intel.com>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -21,7 +23,6 @@
 
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
-#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
@@ -322,9 +323,4 @@ static int __init platform_pmic_gpio_init(void)
 {
        return platform_driver_register(&platform_pmic_gpio_driver);
 }
-
 subsys_initcall(platform_pmic_gpio_init);
-
-MODULE_AUTHOR("Alek Du <alek.du@intel.com>");
-MODULE_DESCRIPTION("Intel Moorestown PMIC GPIO driver");
-MODULE_LICENSE("GPL v2");
index ee4f183..344a3ba 100644 (file)
@@ -268,18 +268,19 @@ static int setup_interrupt(int gpio)
                return err;
 
        irq = gpio_to_irq(gpio);
+       if (irq < 0)
+               return irq;
 
-       if (NO_IRQ == irq)
-               return NO_IRQ;
-
-       if (irq_set_irq_type(irq, IRQF_TRIGGER_FALLING)) {
+       err = irq_set_irq_type(irq, IRQF_TRIGGER_FALLING);
+       if (err) {
                pr_err("cannot set trigger type for irq %d\n", irq);
-               return NO_IRQ;
+               return err;
        }
 
-       if (request_irq(irq, isr, 0, DRIVER, &ixp_clock)) {
+       err = request_irq(irq, isr, 0, DRIVER, &ixp_clock);
+       if (err) {
                pr_err("request_irq failed for irq %d\n", irq);
-               return NO_IRQ;
+               return err;
        }
 
        return irq;
index 32f0f01..9d19b9a 100644 (file)
@@ -1161,7 +1161,7 @@ static int tsi721_rio_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart,
                } else if (ibw_start < (ib_win->rstart + ib_win->size) &&
                           (ibw_start + ibw_size) > ib_win->rstart) {
                        /* Return error if address translation involved */
-                       if (direct && ib_win->xlat) {
+                       if (!direct || ib_win->xlat) {
                                ret = -EFAULT;
                                break;
                        }
index b2daa66..c9ff261 100644 (file)
@@ -2,7 +2,7 @@
  * max14577.c - Regulator driver for the Maxim 14577/77836
  *
  * Copyright (C) 2013,2014 Samsung Electronics
- * Krzysztof Kozlowski <k.kozlowski@samsung.com>
+ * Krzysztof Kozlowski <krzk@kernel.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -331,7 +331,7 @@ static void __exit max14577_regulator_exit(void)
 }
 module_exit(max14577_regulator_exit);
 
-MODULE_AUTHOR("Krzysztof Kozlowski <k.kozlowski@samsung.com>");
+MODULE_AUTHOR("Krzysztof Kozlowski <krzk@kernel.org>");
 MODULE_DESCRIPTION("Maxim 14577/77836 regulator driver");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:max14577-regulator");
index de730fd..cfbb951 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2013-2015 Samsung Electronics
  * Jonghwa Lee <jonghwa3.lee@samsung.com>
- * Krzysztof Kozlowski <k.kozlowski.k@gmail.com>
+ * Krzysztof Kozlowski <krzk@kernel.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -314,5 +314,5 @@ module_exit(max77693_pmic_cleanup);
 
 MODULE_DESCRIPTION("MAXIM 77693/77843 regulator driver");
 MODULE_AUTHOR("Jonghwa Lee <jonghwa3.lee@samsung.com>");
-MODULE_AUTHOR("Krzysztof Kozlowski <k.kozlowski.k@gmail.com>");
+MODULE_AUTHOR("Krzysztof Kozlowski <krzk@kernel.org>");
 MODULE_LICENSE("GPL");
index 5022fa8..8ed46a9 100644 (file)
@@ -178,20 +178,21 @@ static const struct regulator_desc pma8084_hfsmps = {
 static const struct regulator_desc pma8084_ftsmps = {
        .linear_ranges = (struct regulator_linear_range[]) {
                REGULATOR_LINEAR_RANGE(350000,  0, 184, 5000),
-               REGULATOR_LINEAR_RANGE(700000, 185, 339, 10000),
+               REGULATOR_LINEAR_RANGE(1280000, 185, 261, 10000),
        },
        .n_linear_ranges = 2,
-       .n_voltages = 340,
+       .n_voltages = 262,
        .ops = &rpm_smps_ldo_ops,
 };
 
 static const struct regulator_desc pma8084_pldo = {
        .linear_ranges = (struct regulator_linear_range[]) {
-               REGULATOR_LINEAR_RANGE(750000,  0,  30, 25000),
-               REGULATOR_LINEAR_RANGE(1500000, 31, 99, 50000),
+               REGULATOR_LINEAR_RANGE( 750000,  0,  63, 12500),
+               REGULATOR_LINEAR_RANGE(1550000, 64, 126, 25000),
+               REGULATOR_LINEAR_RANGE(3100000, 127, 163, 50000),
        },
-       .n_linear_ranges = 2,
-       .n_voltages = 100,
+       .n_linear_ranges = 3,
+       .n_voltages = 164,
        .ops = &rpm_smps_ldo_ops,
 };
 
@@ -221,29 +222,30 @@ static const struct regulator_desc pm8x41_hfsmps = {
 static const struct regulator_desc pm8841_ftsmps = {
        .linear_ranges = (struct regulator_linear_range[]) {
                REGULATOR_LINEAR_RANGE(350000,  0, 184, 5000),
-               REGULATOR_LINEAR_RANGE(700000, 185, 339, 10000),
+               REGULATOR_LINEAR_RANGE(1280000, 185, 261, 10000),
        },
        .n_linear_ranges = 2,
-       .n_voltages = 340,
+       .n_voltages = 262,
        .ops = &rpm_smps_ldo_ops,
 };
 
 static const struct regulator_desc pm8941_boost = {
        .linear_ranges = (struct regulator_linear_range[]) {
-               REGULATOR_LINEAR_RANGE(4000000, 0, 15, 100000),
+               REGULATOR_LINEAR_RANGE(4000000, 0, 30, 50000),
        },
        .n_linear_ranges = 1,
-       .n_voltages = 16,
+       .n_voltages = 31,
        .ops = &rpm_smps_ldo_ops,
 };
 
 static const struct regulator_desc pm8941_pldo = {
        .linear_ranges = (struct regulator_linear_range[]) {
-               REGULATOR_LINEAR_RANGE( 750000,  0,  30, 25000),
-               REGULATOR_LINEAR_RANGE(1500000, 31, 99, 50000),
+               REGULATOR_LINEAR_RANGE( 750000,  0,  63, 12500),
+               REGULATOR_LINEAR_RANGE(1550000, 64, 126, 25000),
+               REGULATOR_LINEAR_RANGE(3100000, 127, 163, 50000),
        },
-       .n_linear_ranges = 2,
-       .n_voltages = 100,
+       .n_linear_ranges = 3,
+       .n_voltages = 164,
        .ops = &rpm_smps_ldo_ops,
 };
 
index b381b37..5648b71 100644 (file)
@@ -63,7 +63,7 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg)
        struct fib *fibptr;
        struct hw_fib * hw_fib = (struct hw_fib *)0;
        dma_addr_t hw_fib_pa = (dma_addr_t)0LL;
-       unsigned size;
+       unsigned int size, osize;
        int retval;
 
        if (dev->in_reset) {
@@ -87,7 +87,8 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg)
         *      will not overrun the buffer when we copy the memory. Return
         *      an error if we would.
         */
-       size = le16_to_cpu(kfib->header.Size) + sizeof(struct aac_fibhdr);
+       osize = size = le16_to_cpu(kfib->header.Size) +
+               sizeof(struct aac_fibhdr);
        if (size < le16_to_cpu(kfib->header.SenderSize))
                size = le16_to_cpu(kfib->header.SenderSize);
        if (size > dev->max_fib_size) {
@@ -118,6 +119,14 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg)
                goto cleanup;
        }
 
+       /* Sanity check the second copy */
+       if ((osize != le16_to_cpu(kfib->header.Size) +
+               sizeof(struct aac_fibhdr))
+               || (size < le16_to_cpu(kfib->header.SenderSize))) {
+               retval = -EINVAL;
+               goto cleanup;
+       }
+
        if (kfib->header.Command == cpu_to_le16(TakeABreakPt)) {
                aac_adapter_interrupt(dev);
                /*
index 83458f7..6dc96c8 100644 (file)
@@ -361,8 +361,9 @@ static const char * const snstext[] = {
 
 /* Get sense key string or NULL if not available */
 const char *
-scsi_sense_key_string(unsigned char key) {
-       if (key <= 0xE)
+scsi_sense_key_string(unsigned char key)
+{
+       if (key < ARRAY_SIZE(snstext))
                return snstext[key];
        return NULL;
 }
index a569c65..dcf3653 100644 (file)
@@ -2923,7 +2923,7 @@ static int fcoe_ctlr_vlan_recv(struct fcoe_ctlr *fip, struct sk_buff *skb)
        mutex_unlock(&fip->ctlr_mutex);
 
 drop:
-       kfree(skb);
+       kfree_skb(skb);
        return rc;
 }
 
index 2dab3dc..c1ed25a 100644 (file)
@@ -5037,7 +5037,7 @@ static int megasas_init_fw(struct megasas_instance *instance)
        /* Find first memory bar */
        bar_list = pci_select_bars(instance->pdev, IORESOURCE_MEM);
        instance->bar = find_first_bit(&bar_list, sizeof(unsigned long));
-       if (pci_request_selected_regions(instance->pdev, instance->bar,
+       if (pci_request_selected_regions(instance->pdev, 1<<instance->bar,
                                         "megasas: LSI")) {
                dev_printk(KERN_DEBUG, &instance->pdev->dev, "IO memory region busy!\n");
                return -EBUSY;
@@ -5339,7 +5339,7 @@ fail_ready_state:
        iounmap(instance->reg_set);
 
       fail_ioremap:
-       pci_release_selected_regions(instance->pdev, instance->bar);
+       pci_release_selected_regions(instance->pdev, 1<<instance->bar);
 
        return -EINVAL;
 }
@@ -5360,7 +5360,7 @@ static void megasas_release_mfi(struct megasas_instance *instance)
 
        iounmap(instance->reg_set);
 
-       pci_release_selected_regions(instance->pdev, instance->bar);
+       pci_release_selected_regions(instance->pdev, 1<<instance->bar);
 }
 
 /**
index ec83754..52d8bbf 100644 (file)
@@ -2603,7 +2603,7 @@ megasas_release_fusion(struct megasas_instance *instance)
 
        iounmap(instance->reg_set);
 
-       pci_release_selected_regions(instance->pdev, instance->bar);
+       pci_release_selected_regions(instance->pdev, 1<<instance->bar);
 }
 
 /**
index 751f13e..750f82c 100644 (file)
@@ -2188,6 +2188,17 @@ mpt3sas_base_map_resources(struct MPT3SAS_ADAPTER *ioc)
        } else
                ioc->msix96_vector = 0;
 
+       if (ioc->is_warpdrive) {
+               ioc->reply_post_host_index[0] = (resource_size_t __iomem *)
+                   &ioc->chip->ReplyPostHostIndex;
+
+               for (i = 1; i < ioc->cpu_msix_table_sz; i++)
+                       ioc->reply_post_host_index[i] =
+                       (resource_size_t __iomem *)
+                       ((u8 __iomem *)&ioc->chip->Doorbell + (0x4000 + ((i - 1)
+                       * 4)));
+       }
+
        list_for_each_entry(reply_q, &ioc->reply_queue_list, list)
                pr_info(MPT3SAS_FMT "%s: IRQ %d\n",
                    reply_q->name,  ((ioc->msix_enable) ? "PCI-MSI-X enabled" :
@@ -5280,17 +5291,6 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
        if (r)
                goto out_free_resources;
 
-       if (ioc->is_warpdrive) {
-               ioc->reply_post_host_index[0] = (resource_size_t __iomem *)
-                   &ioc->chip->ReplyPostHostIndex;
-
-               for (i = 1; i < ioc->cpu_msix_table_sz; i++)
-                       ioc->reply_post_host_index[i] =
-                       (resource_size_t __iomem *)
-                       ((u8 __iomem *)&ioc->chip->Doorbell + (0x4000 + ((i - 1)
-                       * 4)));
-       }
-
        pci_set_drvdata(ioc->pdev, ioc->shost);
        r = _base_get_ioc_facts(ioc, CAN_SLEEP);
        if (r)
index eaccd65..2464569 100644 (file)
@@ -246,6 +246,10 @@ static struct {
        {"IBM", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
        {"SUN", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
        {"DELL", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
+       {"STK", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
+       {"NETAPP", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
+       {"LSI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
+       {"ENGENIO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
        {"SMSC", "USB 2 HS-CF", NULL, BLIST_SPARSELUN | BLIST_INQUIRY_36},
        {"SONY", "CD-ROM CDU-8001", NULL, BLIST_BORKEN},
        {"SONY", "TSL", NULL, BLIST_FORCELUN},          /* DDS3 & DDS4 autoloaders */
index 3f0ff07..60b651b 100644 (file)
@@ -340,22 +340,6 @@ static int do_sas_phy_delete(struct device *dev, void *data)
        return 0;
 }
 
-/**
- * is_sas_attached - check if device is SAS attached
- * @sdev: scsi device to check
- *
- * returns true if the device is SAS attached
- */
-int is_sas_attached(struct scsi_device *sdev)
-{
-       struct Scsi_Host *shost = sdev->host;
-
-       return shost->transportt->host_attrs.ac.class ==
-               &sas_host_class.class;
-}
-EXPORT_SYMBOL(is_sas_attached);
-
-
 /**
  * sas_remove_children  -  tear down a devices SAS data structures
  * @dev:       device belonging to the sas object
index 53ef1cb..8c9a35c 100644 (file)
@@ -587,7 +587,7 @@ static void ses_match_to_enclosure(struct enclosure_device *edev,
 
        ses_enclosure_data_process(edev, to_scsi_device(edev->edev.parent), 0);
 
-       if (is_sas_attached(sdev))
+       if (scsi_is_sas_rphy(&sdev->sdev_gendev))
                efd.addr = sas_get_address(sdev);
 
        if (efd.addr) {
@@ -778,6 +778,8 @@ static void ses_intf_remove_enclosure(struct scsi_device *sdev)
        if (!edev)
                return;
 
+       enclosure_unregister(edev);
+
        ses_dev = edev->scratch;
        edev->scratch = NULL;
 
@@ -789,7 +791,6 @@ static void ses_intf_remove_enclosure(struct scsi_device *sdev)
        kfree(edev->component[0].scratch);
 
        put_device(&edev->edev);
-       enclosure_unregister(edev);
 }
 
 static void ses_intf_remove(struct device *cdev,
index e3da1a2..2a9da2e 100644 (file)
@@ -962,7 +962,7 @@ static void wd719x_pci_remove(struct pci_dev *pdev)
        scsi_host_put(sh);
 }
 
-static DEFINE_PCI_DEVICE_TABLE(wd719x_pci_table) = {
+static const struct pci_device_id wd719x_pci_table[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_WD, 0x3296) },
        {}
 };
index 823cbc9..7a37090 100644 (file)
@@ -720,8 +720,6 @@ static int img_spfi_remove(struct platform_device *pdev)
                clk_disable_unprepare(spfi->sys_clk);
        }
 
-       spi_master_put(master);
-
        return 0;
 }
 
index 0be89e0..899d7a8 100644 (file)
@@ -685,7 +685,6 @@ static int mtk_spi_remove(struct platform_device *pdev)
        pm_runtime_disable(&pdev->dev);
 
        mtk_spi_reset(mdata);
-       spi_master_put(master);
 
        return 0;
 }
index f3df522..58d2d48 100644 (file)
@@ -214,6 +214,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
                return PTR_ERR(ssp->clk);
 
        memset(&pi, 0, sizeof(pi));
+       pi.fwnode = dev->dev.fwnode;
        pi.parent = &dev->dev;
        pi.name = "pxa2xx-spi";
        pi.id = ssp->port_id;
index c338ef1..7f15556 100644 (file)
@@ -1030,7 +1030,6 @@ static int spi_qup_remove(struct platform_device *pdev)
 
        pm_runtime_put_noidle(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
-       spi_master_put(master);
 
        return 0;
 }
index 0f83ad1..1de3a77 100644 (file)
@@ -262,6 +262,9 @@ static void sh_msiof_spi_set_clk_regs(struct sh_msiof_spi_priv *p,
 
        for (k = 0; k < ARRAY_SIZE(sh_msiof_spi_div_table); k++) {
                brps = DIV_ROUND_UP(div, sh_msiof_spi_div_table[k].div);
+               /* SCR_BRDV_DIV_1 is valid only if BRPS is x 1/1 or x 1/2 */
+               if (sh_msiof_spi_div_table[k].div == 1 && brps > 2)
+                       continue;
                if (brps <= 32) /* max of brdv is 32 */
                        break;
        }
index 51ad42f..200ca22 100644 (file)
@@ -960,7 +960,7 @@ static int spi_transfer_one_message(struct spi_master *master,
        struct spi_transfer *xfer;
        bool keep_cs = false;
        int ret = 0;
-       unsigned long ms = 1;
+       unsigned long long ms = 1;
        struct spi_statistics *statm = &master->statistics;
        struct spi_statistics *stats = &msg->spi->statistics;
 
@@ -991,9 +991,13 @@ static int spi_transfer_one_message(struct spi_master *master,
 
                        if (ret > 0) {
                                ret = 0;
-                               ms = xfer->len * 8 * 1000 / xfer->speed_hz;
+                               ms = 8LL * 1000LL * xfer->len;
+                               do_div(ms, xfer->speed_hz);
                                ms += ms + 100; /* some tolerance */
 
+                               if (ms > UINT_MAX)
+                                       ms = UINT_MAX;
+
                                ms = wait_for_completion_timeout(&master->xfer_completion,
                                                                 msecs_to_jiffies(ms));
                        }
@@ -1159,6 +1163,7 @@ static void __spi_pump_messages(struct spi_master *master, bool in_kthread)
                if (ret < 0) {
                        dev_err(&master->dev, "Failed to power device: %d\n",
                                ret);
+                       mutex_unlock(&master->io_mutex);
                        return;
                }
        }
@@ -1174,6 +1179,7 @@ static void __spi_pump_messages(struct spi_master *master, bool in_kthread)
 
                        if (master->auto_runtime_pm)
                                pm_runtime_put(master->dev.parent);
+                       mutex_unlock(&master->io_mutex);
                        return;
                }
        }
index d7dd1e5..9f525ff 100644 (file)
@@ -196,6 +196,7 @@ static int pci1760_pwm_ns_to_div(unsigned int flags, unsigned int ns)
                break;
        case CMDF_ROUND_DOWN:
                divisor = ns / PCI1760_PWM_TIMEBASE;
+               break;
        default:
                return -EINVAL;
        }
index 4ab1866..ec5b9a2 100644 (file)
 
 #define N_CHANS 8
 
-enum waveform_state_bits {
-       WAVEFORM_AI_RUNNING,
-       WAVEFORM_AO_RUNNING
-};
-
 /* Data unique to this driver */
 struct waveform_private {
        struct timer_list ai_timer;     /* timer for AI commands */
@@ -68,7 +63,6 @@ struct waveform_private {
        unsigned int wf_amplitude;      /* waveform amplitude in microvolts */
        unsigned int wf_period;         /* waveform period in microseconds */
        unsigned int wf_current;        /* current time in waveform period */
-       unsigned long state_bits;
        unsigned int ai_scan_period;    /* AI scan period in usec */
        unsigned int ai_convert_period; /* AI conversion period in usec */
        struct timer_list ao_timer;     /* timer for AO commands */
@@ -191,10 +185,6 @@ static void waveform_ai_timer(unsigned long arg)
        unsigned int nsamples;
        unsigned int time_increment;
 
-       /* check command is still active */
-       if (!test_bit(WAVEFORM_AI_RUNNING, &devpriv->state_bits))
-               return;
-
        now = ktime_to_us(ktime_get());
        nsamples = comedi_nsamples_left(s, UINT_MAX);
 
@@ -386,11 +376,6 @@ static int waveform_ai_cmd(struct comedi_device *dev,
         */
        devpriv->ai_timer.expires =
                jiffies + usecs_to_jiffies(devpriv->ai_convert_period) + 1;
-
-       /* mark command as active */
-       smp_mb__before_atomic();
-       set_bit(WAVEFORM_AI_RUNNING, &devpriv->state_bits);
-       smp_mb__after_atomic();
        add_timer(&devpriv->ai_timer);
        return 0;
 }
@@ -400,11 +385,12 @@ static int waveform_ai_cancel(struct comedi_device *dev,
 {
        struct waveform_private *devpriv = dev->private;
 
-       /* mark command as no longer active */
-       clear_bit(WAVEFORM_AI_RUNNING, &devpriv->state_bits);
-       smp_mb__after_atomic();
-       /* cannot call del_timer_sync() as may be called from timer routine */
-       del_timer(&devpriv->ai_timer);
+       if (in_softirq()) {
+               /* Assume we were called from the timer routine itself. */
+               del_timer(&devpriv->ai_timer);
+       } else {
+               del_timer_sync(&devpriv->ai_timer);
+       }
        return 0;
 }
 
@@ -436,10 +422,6 @@ static void waveform_ao_timer(unsigned long arg)
        u64 scans_since;
        unsigned int scans_avail = 0;
 
-       /* check command is still active */
-       if (!test_bit(WAVEFORM_AO_RUNNING, &devpriv->state_bits))
-               return;
-
        /* determine number of scan periods since last time */
        now = ktime_to_us(ktime_get());
        scans_since = now - devpriv->ao_last_scan_time;
@@ -518,11 +500,6 @@ static int waveform_ao_inttrig_start(struct comedi_device *dev,
        devpriv->ao_last_scan_time = ktime_to_us(ktime_get());
        devpriv->ao_timer.expires =
                jiffies + usecs_to_jiffies(devpriv->ao_scan_period);
-
-       /* mark command as active */
-       smp_mb__before_atomic();
-       set_bit(WAVEFORM_AO_RUNNING, &devpriv->state_bits);
-       smp_mb__after_atomic();
        add_timer(&devpriv->ao_timer);
 
        return 1;
@@ -608,11 +585,12 @@ static int waveform_ao_cancel(struct comedi_device *dev,
        struct waveform_private *devpriv = dev->private;
 
        s->async->inttrig = NULL;
-       /* mark command as no longer active */
-       clear_bit(WAVEFORM_AO_RUNNING, &devpriv->state_bits);
-       smp_mb__after_atomic();
-       /* cannot call del_timer_sync() as may be called from timer routine */
-       del_timer(&devpriv->ao_timer);
+       if (in_softirq()) {
+               /* Assume we were called from the timer routine itself. */
+               del_timer(&devpriv->ao_timer);
+       } else {
+               del_timer_sync(&devpriv->ao_timer);
+       }
        return 0;
 }
 
index 65daef0..0f4eb95 100644 (file)
@@ -634,7 +634,7 @@ static const void *daqboard2000_find_boardinfo(struct comedi_device *dev,
        const struct daq200_boardtype *board;
        int i;
 
-       if (pcidev->subsystem_device != PCI_VENDOR_ID_IOTECH)
+       if (pcidev->subsystem_vendor != PCI_VENDOR_ID_IOTECH)
                return NULL;
 
        for (i = 0; i < ARRAY_SIZE(boardtypes); i++) {
index 904f637..8bbd938 100644 (file)
@@ -588,8 +588,8 @@ static int dt2811_attach(struct comedi_device *dev, struct comedi_devconfig *it)
        s = &dev->subdevices[0];
        s->type         = COMEDI_SUBD_AI;
        s->subdev_flags = SDF_READABLE |
-                         (it->options[2] == 1) ? SDF_DIFF :
-                         (it->options[2] == 2) ? SDF_COMMON : SDF_GROUND;
+                         ((it->options[2] == 1) ? SDF_DIFF :
+                          (it->options[2] == 2) ? SDF_COMMON : SDF_GROUND);
        s->n_chan       = (it->options[2] == 1) ? 8 : 16;
        s->maxdata      = 0x0fff;
        s->range_table  = board->is_pgh ? &dt2811_pgh_ai_ranges
index 8dabb19..0f97d7b 100644 (file)
@@ -2772,7 +2772,15 @@ static int ni_ao_inttrig(struct comedi_device *dev,
        int i;
        static const int timeout = 1000;
 
-       if (trig_num != cmd->start_arg)
+       /*
+        * Require trig_num == cmd->start_arg when cmd->start_src == TRIG_INT.
+        * For backwards compatibility, also allow trig_num == 0 when
+        * cmd->start_src != TRIG_INT (i.e. when cmd->start_src == TRIG_EXT);
+        * in that case, the internal trigger is being used as a pre-trigger
+        * before the external trigger.
+        */
+       if (!(trig_num == cmd->start_arg ||
+             (trig_num == 0 && cmd->start_src != TRIG_INT)))
                return -EINVAL;
 
        /*
@@ -5480,7 +5488,7 @@ static int ni_E_init(struct comedi_device *dev,
                s->maxdata      = (devpriv->is_m_series) ? 0xffffffff
                                                         : 0x00ffffff;
                s->insn_read    = ni_tio_insn_read;
-               s->insn_write   = ni_tio_insn_read;
+               s->insn_write   = ni_tio_insn_write;
                s->insn_config  = ni_tio_insn_config;
 #ifdef PCIDMA
                if (dev->irq && devpriv->mite) {
index 170ac98..24c348d 100644 (file)
@@ -419,6 +419,7 @@ static ssize_t ad5933_store(struct device *dev,
        mutex_lock(&indio_dev->mlock);
        switch ((u32)this_attr->address) {
        case AD5933_OUT_RANGE:
+               ret = -EINVAL;
                for (i = 0; i < 4; i++)
                        if (val == st->range_avail[i]) {
                                st->ctrl_hb &= ~AD5933_CTRL_RANGE(0x3);
@@ -426,7 +427,6 @@ static ssize_t ad5933_store(struct device *dev,
                                ret = ad5933_cmd(st, 0);
                                break;
                        }
-               ret = -EINVAL;
                break;
        case AD5933_IN_PGA_GAIN:
                if (sysfs_streq(buf, "1")) {
index 3664bfd..2c4dc69 100644 (file)
@@ -388,6 +388,7 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
        struct inode *inode = NULL;
        __u64 bits = 0;
        int rc = 0;
+       struct dentry *alias;
 
        /* NB 1 request reference will be taken away by ll_intent_lock()
         * when I return
@@ -412,26 +413,12 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
                 */
        }
 
-       /* Only hash *de if it is unhashed (new dentry).
-        * Atoimc_open may passing hashed dentries for open.
-        */
-       if (d_unhashed(*de)) {
-               struct dentry *alias;
-
-               alias = ll_splice_alias(inode, *de);
-               if (IS_ERR(alias)) {
-                       rc = PTR_ERR(alias);
-                       goto out;
-               }
-               *de = alias;
-       } else if (!it_disposition(it, DISP_LOOKUP_NEG)  &&
-                  !it_disposition(it, DISP_OPEN_CREATE)) {
-               /* With DISP_OPEN_CREATE dentry will be
-                * instantiated in ll_create_it.
-                */
-               LASSERT(!d_inode(*de));
-               d_instantiate(*de, inode);
+       alias = ll_splice_alias(inode, *de);
+       if (IS_ERR(alias)) {
+               rc = PTR_ERR(alias);
+               goto out;
        }
+       *de = alias;
 
        if (!it_disposition(it, DISP_LOOKUP_NEG)) {
                /* we have lookup look - unhide dentry */
@@ -587,6 +574,24 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
               dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode,
               *opened);
 
+       /* Only negative dentries enter here */
+       LASSERT(!d_inode(dentry));
+
+       if (!d_in_lookup(dentry)) {
+               /* A valid negative dentry that just passed revalidation,
+                * there's little point to try and open it server-side,
+                * even though there's a minuscle chance it might succeed.
+                * Either way it's a valid race to just return -ENOENT here.
+                */
+               if (!(open_flags & O_CREAT))
+                       return -ENOENT;
+
+               /* Otherwise we just unhash it to be rehashed afresh via
+                * lookup if necessary
+                */
+               d_drop(dentry);
+       }
+
        it = kzalloc(sizeof(*it), GFP_NOFS);
        if (!it)
                return -ENOMEM;
index 0b1760c..78f524f 100644 (file)
@@ -3363,7 +3363,7 @@ int wilc_init(struct net_device *dev, struct host_if_drv **hif_drv_handler)
                if (!hif_workqueue) {
                        netdev_err(vif->ndev, "Failed to create workqueue\n");
                        result = -ENOMEM;
-                       goto _fail_mq_;
+                       goto _fail_;
                }
 
                setup_timer(&periodic_rssi, GetPeriodicRSSI,
@@ -3391,7 +3391,6 @@ int wilc_init(struct net_device *dev, struct host_if_drv **hif_drv_handler)
 
        clients_count++;
 
-_fail_mq_:
        destroy_workqueue(hif_workqueue);
 _fail_:
        return result;
index 3a66255..3221511 100644 (file)
@@ -648,7 +648,7 @@ void wilc1000_wlan_deinit(struct net_device *dev)
                        mutex_unlock(&wl->hif_cs);
                }
                if (&wl->txq_event)
-                       wait_for_completion(&wl->txq_event);
+                       complete(&wl->txq_event);
 
                wlan_deinitialize_threads(dev);
                deinit_irq(dev);
index 9092600..2c2e8ac 100644 (file)
@@ -1191,7 +1191,7 @@ static int get_station(struct wiphy *wiphy, struct net_device *dev,
        struct wilc_priv *priv;
        struct wilc_vif *vif;
        u32 i = 0;
-       u32 associatedsta = 0;
+       u32 associatedsta = ~0;
        u32 inactive_time = 0;
        priv = wiphy_priv(wiphy);
        vif = netdev_priv(dev);
@@ -1204,7 +1204,7 @@ static int get_station(struct wiphy *wiphy, struct net_device *dev,
                        }
                }
 
-               if (associatedsta == -1) {
+               if (associatedsta == ~0) {
                        netdev_err(dev, "sta required is not associated\n");
                        return -ENOENT;
                }
index 0ae0b13..2fb1bf1 100644 (file)
@@ -24,6 +24,7 @@
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
 
+#include <libcxgb_cm.h>
 #include "cxgbit.h"
 #include "clip_tbl.h"
 
@@ -72,15 +73,6 @@ out:
        return wr_waitp->ret;
 }
 
-/* Returns whether a CPL status conveys negative advice.
- */
-static int cxgbit_is_neg_adv(unsigned int status)
-{
-       return status == CPL_ERR_RTX_NEG_ADVICE ||
-               status == CPL_ERR_PERSIST_NEG_ADVICE ||
-               status == CPL_ERR_KEEPALV_NEG_ADVICE;
-}
-
 static int cxgbit_np_hashfn(const struct cxgbit_np *cnp)
 {
        return ((unsigned long)cnp >> 10) & (NP_INFO_HASH_SIZE - 1);
@@ -623,21 +615,14 @@ void cxgbit_free_np(struct iscsi_np *np)
 static void cxgbit_send_halfclose(struct cxgbit_sock *csk)
 {
        struct sk_buff *skb;
-       struct cpl_close_con_req *req;
-       unsigned int len = roundup(sizeof(struct cpl_close_con_req), 16);
+       u32 len = roundup(sizeof(struct cpl_close_con_req), 16);
 
        skb = alloc_skb(len, GFP_ATOMIC);
        if (!skb)
                return;
 
-       req = (struct cpl_close_con_req *)__skb_put(skb, len);
-       memset(req, 0, len);
-
-       set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx);
-       INIT_TP_WR(req, csk->tid);
-       OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ,
-                                                   csk->tid));
-       req->rsvd = 0;
+       cxgb_mk_close_con_req(skb, len, csk->tid, csk->txq_idx,
+                             NULL, NULL);
 
        cxgbit_skcb_flags(skb) |= SKCBF_TX_FLAG_COMPL;
        __skb_queue_tail(&csk->txq, skb);
@@ -662,9 +647,8 @@ static void cxgbit_abort_arp_failure(void *handle, struct sk_buff *skb)
 
 static int cxgbit_send_abort_req(struct cxgbit_sock *csk)
 {
-       struct cpl_abort_req *req;
-       unsigned int len = roundup(sizeof(*req), 16);
        struct sk_buff *skb;
+       u32 len = roundup(sizeof(struct cpl_abort_req), 16);
 
        pr_debug("%s: csk %p tid %u; state %d\n",
                 __func__, csk, csk->tid, csk->com.state);
@@ -675,15 +659,9 @@ static int cxgbit_send_abort_req(struct cxgbit_sock *csk)
                cxgbit_send_tx_flowc_wr(csk);
 
        skb = __skb_dequeue(&csk->skbq);
-       req = (struct cpl_abort_req *)__skb_put(skb, len);
-       memset(req, 0, len);
+       cxgb_mk_abort_req(skb, len, csk->tid, csk->txq_idx,
+                         csk->com.cdev, cxgbit_abort_arp_failure);
 
-       set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx);
-       t4_set_arp_err_handler(skb, csk->com.cdev, cxgbit_abort_arp_failure);
-       INIT_TP_WR(req, csk->tid);
-       OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ,
-                                                   csk->tid));
-       req->cmd = CPL_ABORT_SEND_RST;
        return cxgbit_l2t_send(csk->com.cdev, skb, csk->l2t);
 }
 
@@ -789,109 +767,6 @@ void _cxgbit_free_csk(struct kref *kref)
        kfree(csk);
 }
 
-static void
-cxgbit_get_tuple_info(struct cpl_pass_accept_req *req, int *iptype,
-                     __u8 *local_ip, __u8 *peer_ip, __be16 *local_port,
-                     __be16 *peer_port)
-{
-       u32 eth_len = ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
-       u32 ip_len = IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
-       struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
-       struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
-       struct tcphdr *tcp = (struct tcphdr *)
-                             ((u8 *)(req + 1) + eth_len + ip_len);
-
-       if (ip->version == 4) {
-               pr_debug("%s saddr 0x%x daddr 0x%x sport %u dport %u\n",
-                        __func__,
-                        ntohl(ip->saddr), ntohl(ip->daddr),
-                        ntohs(tcp->source),
-                        ntohs(tcp->dest));
-               *iptype = 4;
-               memcpy(peer_ip, &ip->saddr, 4);
-               memcpy(local_ip, &ip->daddr, 4);
-       } else {
-               pr_debug("%s saddr %pI6 daddr %pI6 sport %u dport %u\n",
-                        __func__,
-                        ip6->saddr.s6_addr, ip6->daddr.s6_addr,
-                        ntohs(tcp->source),
-                        ntohs(tcp->dest));
-               *iptype = 6;
-               memcpy(peer_ip, ip6->saddr.s6_addr, 16);
-               memcpy(local_ip, ip6->daddr.s6_addr, 16);
-       }
-
-       *peer_port = tcp->source;
-       *local_port = tcp->dest;
-}
-
-static int
-cxgbit_our_interface(struct cxgbit_device *cdev, struct net_device *egress_dev)
-{
-       u8 i;
-
-       egress_dev = cxgbit_get_real_dev(egress_dev);
-       for (i = 0; i < cdev->lldi.nports; i++)
-               if (cdev->lldi.ports[i] == egress_dev)
-                       return 1;
-       return 0;
-}
-
-static struct dst_entry *
-cxgbit_find_route6(struct cxgbit_device *cdev, __u8 *local_ip, __u8 *peer_ip,
-                  __be16 local_port, __be16 peer_port, u8 tos,
-                  __u32 sin6_scope_id)
-{
-       struct dst_entry *dst = NULL;
-
-       if (IS_ENABLED(CONFIG_IPV6)) {
-               struct flowi6 fl6;
-
-               memset(&fl6, 0, sizeof(fl6));
-               memcpy(&fl6.daddr, peer_ip, 16);
-               memcpy(&fl6.saddr, local_ip, 16);
-               if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
-                       fl6.flowi6_oif = sin6_scope_id;
-               dst = ip6_route_output(&init_net, NULL, &fl6);
-               if (!dst)
-                       goto out;
-               if (!cxgbit_our_interface(cdev, ip6_dst_idev(dst)->dev) &&
-                   !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
-                       dst_release(dst);
-                       dst = NULL;
-               }
-       }
-out:
-       return dst;
-}
-
-static struct dst_entry *
-cxgbit_find_route(struct cxgbit_device *cdev, __be32 local_ip, __be32 peer_ip,
-                 __be16 local_port, __be16 peer_port, u8 tos)
-{
-       struct rtable *rt;
-       struct flowi4 fl4;
-       struct neighbour *n;
-
-       rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip,
-                                  local_ip,
-                                  peer_port, local_port, IPPROTO_TCP,
-                                  tos, 0);
-       if (IS_ERR(rt))
-               return NULL;
-       n = dst_neigh_lookup(&rt->dst, &peer_ip);
-       if (!n)
-               return NULL;
-       if (!cxgbit_our_interface(cdev, n->dev) &&
-           !(n->dev->flags & IFF_LOOPBACK)) {
-               neigh_release(n);
-               dst_release(&rt->dst);
-               return NULL;
-       }
-       neigh_release(n);
-       return &rt->dst;
-}
-
 static void cxgbit_set_tcp_window(struct cxgbit_sock *csk, struct port_info *pi)
 {
        unsigned int linkspeed;
@@ -1072,21 +947,14 @@ int cxgbit_ofld_send(struct cxgbit_device *cdev, struct sk_buff *skb)
 
 static void cxgbit_release_tid(struct cxgbit_device *cdev, u32 tid)
 {
-       struct cpl_tid_release *req;
-       unsigned int len = roundup(sizeof(*req), 16);
+       u32 len = roundup(sizeof(struct cpl_tid_release), 16);
        struct sk_buff *skb;
 
        skb = alloc_skb(len, GFP_ATOMIC);
        if (!skb)
                return;
 
-       req = (struct cpl_tid_release *)__skb_put(skb, len);
-       memset(req, 0, len);
-
-       INIT_TP_WR(req, tid);
-       OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(
-                  CPL_TID_RELEASE, tid));
-       set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
+       cxgb_mk_tid_release(skb, len, tid, 0);
        cxgbit_ofld_send(cdev, skb);
 }
 
@@ -1108,20 +976,6 @@ cxgbit_l2t_send(struct cxgbit_device *cdev, struct sk_buff *skb,
        return ret < 0 ? ret : 0;
 }
 
-static void
-cxgbit_best_mtu(const unsigned short *mtus, unsigned short mtu,
-               unsigned int *idx, int use_ts, int ipv6)
-{
-       unsigned short hdr_size = (ipv6 ? sizeof(struct ipv6hdr) :
-                                  sizeof(struct iphdr)) +
-                                  sizeof(struct tcphdr) +
-                                  (use_ts ? round_up(TCPOLEN_TIMESTAMP,
-                                   4) : 0);
-       unsigned short data_size = mtu - hdr_size;
-
-       cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
-}
-
 static void cxgbit_send_rx_credits(struct cxgbit_sock *csk, struct sk_buff *skb)
 {
        if (csk->com.state != CSK_STATE_ESTABLISHED) {
@@ -1140,22 +994,18 @@ static void cxgbit_send_rx_credits(struct cxgbit_sock *csk, struct sk_buff *skb)
 int cxgbit_rx_data_ack(struct cxgbit_sock *csk)
 {
        struct sk_buff *skb;
-       struct cpl_rx_data_ack *req;
-       unsigned int len = roundup(sizeof(*req), 16);
+       u32 len = roundup(sizeof(struct cpl_rx_data_ack), 16);
+       u32 credit_dack;
 
        skb = alloc_skb(len, GFP_KERNEL);
        if (!skb)
                return -1;
 
-       req = (struct cpl_rx_data_ack *)__skb_put(skb, len);
-       memset(req, 0, len);
+       credit_dack = RX_DACK_CHANGE_F | RX_DACK_MODE_V(1) |
+                     RX_CREDITS_V(csk->rx_credits);
 
-       set_wr_txq(skb, CPL_PRIORITY_ACK, csk->ctrlq_idx);
-       INIT_TP_WR(req, csk->tid);
-       OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
-                                                   csk->tid));
-       req->credit_dack = cpu_to_be32(RX_DACK_CHANGE_F | RX_DACK_MODE_V(1) |
-                                      RX_CREDITS_V(csk->rx_credits));
+       cxgb_mk_rx_data_ack(skb, len, csk->tid, csk->ctrlq_idx,
+                           credit_dack);
 
        csk->rx_credits = 0;
 
@@ -1210,15 +1060,6 @@ out:
        return -ENOMEM;
 }
 
-static u32 cxgbit_compute_wscale(u32 win)
-{
-       u32 wscale = 0;
-
-       while (wscale < 14 && (65535 << wscale) < win)
-               wscale++;
-       return wscale;
-}
-
 static void
 cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req)
 {
@@ -1246,10 +1087,10 @@ cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req)
        INIT_TP_WR(rpl5, csk->tid);
        OPCODE_TID(rpl5) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
                                                     csk->tid));
-       cxgbit_best_mtu(csk->com.cdev->lldi.mtus, csk->mtu, &mtu_idx,
-                       req->tcpopt.tstamp,
-                       (csk->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
-       wscale = cxgbit_compute_wscale(csk->rcv_win);
+       cxgb_best_mtu(csk->com.cdev->lldi.mtus, csk->mtu, &mtu_idx,
+                     req->tcpopt.tstamp,
+                     (csk->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+       wscale = cxgb_compute_wscale(csk->rcv_win);
        /*
         * Specify the largest window that will fit in opt0. The
         * remainder will be specified in the rx_data_ack.
@@ -1340,8 +1181,8 @@ cxgbit_pass_accept_req(struct cxgbit_device *cdev, struct sk_buff *skb)
                goto rel_skb;
        }
 
-       cxgbit_get_tuple_info(req, &iptype, local_ip, peer_ip,
-                             &local_port, &peer_port);
+       cxgb_get_4tuple(req, cdev->lldi.adapter_type, &iptype, local_ip,
+                       peer_ip, &local_port, &peer_port);
 
        /* Find output route */
        if (iptype == 4)  {
@@ -1350,21 +1191,23 @@ cxgbit_pass_accept_req(struct cxgbit_device *cdev, struct sk_buff *skb)
                         , __func__, cnp, tid,
                         local_ip, peer_ip, ntohs(local_port),
                         ntohs(peer_port), peer_mss);
-               dst = cxgbit_find_route(cdev, *(__be32 *)local_ip,
-                                       *(__be32 *)peer_ip,
-                                       local_port, peer_port,
-                                       PASS_OPEN_TOS_G(ntohl(req->tos_stid)));
+               dst = cxgb_find_route(&cdev->lldi, cxgbit_get_real_dev,
+                                     *(__be32 *)local_ip,
+                                     *(__be32 *)peer_ip,
+                                     local_port, peer_port,
+                                     PASS_OPEN_TOS_G(ntohl(req->tos_stid)));
        } else {
                pr_debug("%s parent sock %p tid %u laddr %pI6 raddr %pI6 "
                         "lport %d rport %d peer_mss %d\n"
                         , __func__, cnp, tid,
                         local_ip, peer_ip, ntohs(local_port),
                         ntohs(peer_port), peer_mss);
-               dst = cxgbit_find_route6(cdev, local_ip, peer_ip,
-                                        local_port, peer_port,
-                                        PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
-                                        ((struct sockaddr_in6 *)
-                                        &cnp->com.local_addr)->sin6_scope_id);
+               dst = cxgb_find_route6(&cdev->lldi, cxgbit_get_real_dev,
+                                      local_ip, peer_ip,
+                                      local_port, peer_port,
+                                      PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
+                                      ((struct sockaddr_in6 *)
+                                       &cnp->com.local_addr)->sin6_scope_id);
        }
        if (!dst) {
                pr_err("%s - failed to find dst entry!\n",
@@ -1795,16 +1638,15 @@ static void cxgbit_abort_req_rss(struct cxgbit_sock *csk, struct sk_buff *skb)
 {
        struct cpl_abort_req_rss *hdr = cplhdr(skb);
        unsigned int tid = GET_TID(hdr);
-       struct cpl_abort_rpl *rpl;
        struct sk_buff *rpl_skb;
        bool release = false;
        bool wakeup_thread = false;
-       unsigned int len = roundup(sizeof(*rpl), 16);
+       u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
 
        pr_debug("%s: csk %p; tid %u; state %d\n",
                 __func__, csk, tid, csk->com.state);
 
-       if (cxgbit_is_neg_adv(hdr->status)) {
+       if (cxgb_is_neg_adv(hdr->status)) {
                pr_err("%s: got neg advise %d on tid %u\n",
                       __func__, hdr->status, tid);
                goto rel_skb;
@@ -1839,14 +1681,8 @@ static void cxgbit_abort_req_rss(struct cxgbit_sock *csk, struct sk_buff *skb)
                cxgbit_send_tx_flowc_wr(csk);
 
        rpl_skb = __skb_dequeue(&csk->skbq);
-       set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx);
-
-       rpl = (struct cpl_abort_rpl *)__skb_put(rpl_skb, len);
-       memset(rpl, 0, len);
 
-       INIT_TP_WR(rpl, csk->tid);
-       OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
-       rpl->cmd = CPL_ABORT_NO_RST;
+       cxgb_mk_abort_rpl(rpl_skb, len, csk->tid, csk->txq_idx);
        cxgbit_ofld_send(csk->com.cdev, rpl_skb);
 
        if (wakeup_thread) {
index 3788ed7..a32b417 100644 (file)
@@ -740,12 +740,22 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev,
 }
 
 /* Bind cpufreq callbacks to thermal cooling device ops */
+
 static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
        .get_max_state = cpufreq_get_max_state,
        .get_cur_state = cpufreq_get_cur_state,
        .set_cur_state = cpufreq_set_cur_state,
 };
 
+static struct thermal_cooling_device_ops cpufreq_power_cooling_ops = {
+       .get_max_state          = cpufreq_get_max_state,
+       .get_cur_state          = cpufreq_get_cur_state,
+       .set_cur_state          = cpufreq_set_cur_state,
+       .get_requested_power    = cpufreq_get_requested_power,
+       .state2power            = cpufreq_state2power,
+       .power2state            = cpufreq_power2state,
+};
+
 /* Notifier for cpufreq policy change */
 static struct notifier_block thermal_cpufreq_notifier_block = {
        .notifier_call = cpufreq_thermal_notifier,
@@ -795,6 +805,7 @@ __cpufreq_cooling_register(struct device_node *np,
        struct cpumask temp_mask;
        unsigned int freq, i, num_cpus;
        int ret;
+       struct thermal_cooling_device_ops *cooling_ops;
 
        cpumask_and(&temp_mask, clip_cpus, cpu_online_mask);
        policy = cpufreq_cpu_get(cpumask_first(&temp_mask));
@@ -850,10 +861,6 @@ __cpufreq_cooling_register(struct device_node *np,
        cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus);
 
        if (capacitance) {
-               cpufreq_cooling_ops.get_requested_power =
-                       cpufreq_get_requested_power;
-               cpufreq_cooling_ops.state2power = cpufreq_state2power;
-               cpufreq_cooling_ops.power2state = cpufreq_power2state;
                cpufreq_dev->plat_get_static_power = plat_static_func;
 
                ret = build_dyn_power_table(cpufreq_dev, capacitance);
@@ -861,6 +868,10 @@ __cpufreq_cooling_register(struct device_node *np,
                        cool_dev = ERR_PTR(ret);
                        goto free_table;
                }
+
+               cooling_ops = &cpufreq_power_cooling_ops;
+       } else {
+               cooling_ops = &cpufreq_cooling_ops;
        }
 
        ret = get_idr(&cpufreq_idr, &cpufreq_dev->id);
@@ -885,7 +896,7 @@ __cpufreq_cooling_register(struct device_node *np,
                 cpufreq_dev->id);
 
        cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
-                                                     &cpufreq_cooling_ops);
+                                                     cooling_ops);
        if (IS_ERR(cool_dev))
                goto remove_idr;
 
index c5547bd..e473548 100644 (file)
@@ -471,8 +471,6 @@ MODULE_DEVICE_TABLE(of, of_imx_thermal_match);
 
 static int imx_thermal_probe(struct platform_device *pdev)
 {
-       const struct of_device_id *of_id =
-               of_match_device(of_imx_thermal_match, &pdev->dev);
        struct imx_thermal_data *data;
        struct regmap *map;
        int measure_freq;
@@ -490,7 +488,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
        }
        data->tempmon = map;
 
-       data->socdata = of_id->data;
+       data->socdata = of_device_get_match_data(&pdev->dev);
 
        /* make sure the IRQ flag is clear before enabling irq on i.MX6SX */
        if (data->socdata->version == TEMPMON_IMX6SX) {
index a578cd2..1891f34 100644 (file)
@@ -225,7 +225,6 @@ static struct platform_driver int3406_thermal_driver = {
        .remove = int3406_thermal_remove,
        .driver = {
                   .name = "int3406 thermal",
-                  .owner = THIS_MODULE,
                   .acpi_match_table = int3406_thermal_match,
                   },
 };
index 71a3392..5f81792 100644 (file)
@@ -504,6 +504,7 @@ static int rcar_thermal_probe(struct platform_device *pdev)
                if (IS_ERR(priv->zone)) {
                        dev_err(dev, "can't register thermal zone\n");
                        ret = PTR_ERR(priv->zone);
+                       priv->zone = NULL;
                        goto error_unregister;
                }
 
index 9c15344..a8c2041 100644 (file)
@@ -648,6 +648,12 @@ static struct pci_device_id nhi_ids[] = {
                .device = PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C,
                .subvendor = 0x2222, .subdevice = 0x1111,
        },
+       {
+               .class = PCI_CLASS_SYSTEM_OTHER << 8, .class_mask = ~0,
+               .vendor = PCI_VENDOR_ID_INTEL,
+               .device = PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_NHI,
+               .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID,
+       },
        {
                .class = PCI_CLASS_SYSTEM_OTHER << 8, .class_mask = ~0,
                .vendor = PCI_VENDOR_ID_INTEL,
index 1e116f5..9840fde 100644 (file)
@@ -372,7 +372,9 @@ struct tb_switch *tb_switch_alloc(struct tb *tb, u64 route)
 
        if (sw->config.device_id != PCI_DEVICE_ID_INTEL_LIGHT_RIDGE &&
            sw->config.device_id != PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C &&
-           sw->config.device_id != PCI_DEVICE_ID_INTEL_PORT_RIDGE)
+           sw->config.device_id != PCI_DEVICE_ID_INTEL_PORT_RIDGE &&
+           sw->config.device_id != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_BRIDGE &&
+           sw->config.device_id != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_BRIDGE)
                tb_sw_warn(sw, "unsupported switch device id %#x\n",
                           sw->config.device_id);
 
index 122e0e4..1a16fea 100644 (file)
@@ -15,8 +15,6 @@
 #include <linux/serial_reg.h>
 #include <linux/dmaengine.h>
 
-#include "../serial_mctrl_gpio.h"
-
 struct uart_8250_dma {
        int (*tx_dma)(struct uart_8250_port *p);
        int (*rx_dma)(struct uart_8250_port *p);
@@ -133,43 +131,12 @@ void serial8250_em485_destroy(struct uart_8250_port *p);
 
 static inline void serial8250_out_MCR(struct uart_8250_port *up, int value)
 {
-       int mctrl_gpio = 0;
-
        serial_out(up, UART_MCR, value);
-
-       if (value & UART_MCR_RTS)
-               mctrl_gpio |= TIOCM_RTS;
-       if (value & UART_MCR_DTR)
-               mctrl_gpio |= TIOCM_DTR;
-
-       mctrl_gpio_set(up->gpios, mctrl_gpio);
 }
 
 static inline int serial8250_in_MCR(struct uart_8250_port *up)
 {
-       int mctrl, mctrl_gpio = 0;
-
-       mctrl = serial_in(up, UART_MCR);
-
-       /* save current MCR values */
-       if (mctrl & UART_MCR_RTS)
-               mctrl_gpio |= TIOCM_RTS;
-       if (mctrl & UART_MCR_DTR)
-               mctrl_gpio |= TIOCM_DTR;
-
-       mctrl_gpio = mctrl_gpio_get_outputs(up->gpios, &mctrl_gpio);
-
-       if (mctrl_gpio & TIOCM_RTS)
-               mctrl |= UART_MCR_RTS;
-       else
-               mctrl &= ~UART_MCR_RTS;
-
-       if (mctrl_gpio & TIOCM_DTR)
-               mctrl |= UART_MCR_DTR;
-       else
-               mctrl &= ~UART_MCR_DTR;
-
-       return mctrl;
+       return serial_in(up, UART_MCR);
 }
 
 #if defined(__alpha__) && !defined(CONFIG_PCI)
index 13ad5c3..dcf43f6 100644 (file)
@@ -974,8 +974,6 @@ int serial8250_register_8250_port(struct uart_8250_port *up)
 
        uart = serial8250_find_match_or_unused(&up->port);
        if (uart && uart->port.type != PORT_8250_CIR) {
-               struct mctrl_gpios *gpios;
-
                if (uart->port.dev)
                        uart_remove_one_port(&serial8250_reg, &uart->port);
 
@@ -1013,13 +1011,6 @@ int serial8250_register_8250_port(struct uart_8250_port *up)
                if (up->port.flags & UPF_FIXED_TYPE)
                        uart->port.type = up->port.type;
 
-               gpios = mctrl_gpio_init(&uart->port, 0);
-               if (IS_ERR(gpios)) {
-                       if (PTR_ERR(gpios) != -ENOSYS)
-                               return PTR_ERR(gpios);
-               } else
-                       uart->gpios = gpios;
-
                serial8250_set_defaults(uart);
 
                /* Possibly override default I/O functions.  */
index 737b4b3..0facc78 100644 (file)
@@ -31,7 +31,7 @@
 #define IO_ADDR2 0x60
 #define LDN 0x7
 
-#define IRQ_MODE       0x70
+#define FINTEK_IRQ_MODE        0x70
 #define IRQ_SHARE      BIT(4)
 #define IRQ_MODE_MASK  (BIT(6) | BIT(5))
 #define IRQ_LEVEL_LOW  0
@@ -195,7 +195,7 @@ static int fintek_8250_set_irq_mode(struct fintek_8250 *pdata, bool level_mode)
        outb(LDN, pdata->base_port + ADDR_PORT);
        outb(pdata->index, pdata->base_port + DATA_PORT);
 
-       outb(IRQ_MODE, pdata->base_port + ADDR_PORT);
+       outb(FINTEK_IRQ_MODE, pdata->base_port + ADDR_PORT);
        tmp = inb(pdata->base_port + DATA_PORT);
 
        tmp &= ~IRQ_MODE_MASK;
index 339de9c..20c5db2 100644 (file)
@@ -168,6 +168,9 @@ static void mid8250_set_termios(struct uart_port *p,
        unsigned long w = BIT(24) - 1;
        unsigned long mul, div;
 
+       /* Gracefully handle the B0 case: fall back to B9600 */
+       fuart = fuart ? fuart : 9600 * 16;
+
        if (mid->board->freq < fuart) {
                /* Find prescaler value that satisfies Fuart < Fref */
                if (mid->board->freq > baud)
index e14982f..61ad6c3 100644 (file)
@@ -134,21 +134,18 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl)
 
        serial8250_do_set_mctrl(port, mctrl);
 
-       if (IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(up->gpios,
-                                               UART_GPIO_RTS))) {
-               /*
-                * Turn off autoRTS if RTS is lowered and restore autoRTS
-                * setting if RTS is raised
-                */
-               lcr = serial_in(up, UART_LCR);
-               serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
-               if ((mctrl & TIOCM_RTS) && (port->status & UPSTAT_AUTORTS))
-                       priv->efr |= UART_EFR_RTS;
-               else
-                       priv->efr &= ~UART_EFR_RTS;
-               serial_out(up, UART_EFR, priv->efr);
-               serial_out(up, UART_LCR, lcr);
-       }
+       /*
+        * Turn off autoRTS if RTS is lowered and restore autoRTS setting
+        * if RTS is raised
+        */
+       lcr = serial_in(up, UART_LCR);
+       serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
+       if ((mctrl & TIOCM_RTS) && (port->status & UPSTAT_AUTORTS))
+               priv->efr |= UART_EFR_RTS;
+       else
+               priv->efr &= ~UART_EFR_RTS;
+       serial_out(up, UART_EFR, priv->efr);
+       serial_out(up, UART_LCR, lcr);
 }
 
 /*
@@ -449,9 +446,7 @@ static void omap_8250_set_termios(struct uart_port *port,
        priv->efr = 0;
        up->port.status &= ~(UPSTAT_AUTOCTS | UPSTAT_AUTORTS | UPSTAT_AUTOXOFF);
 
-       if (termios->c_cflag & CRTSCTS && up->port.flags & UPF_HARD_FLOW
-               && IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(up->gpios,
-                                                       UART_GPIO_RTS))) {
+       if (termios->c_cflag & CRTSCTS && up->port.flags & UPF_HARD_FLOW) {
                /* Enable AUTOCTS (autoRTS is enabled when RTS is raised) */
                up->port.status |= UPSTAT_AUTOCTS | UPSTAT_AUTORTS;
                priv->efr |= UART_EFR_CTS;
index 20ebaea..bc51b32 100644 (file)
@@ -1950,6 +1950,43 @@ pci_wch_ch38x_setup(struct serial_private *priv,
 #define PCI_DEVICE_ID_PERICOM_PI7C9X7954       0x7954
 #define PCI_DEVICE_ID_PERICOM_PI7C9X7958       0x7958
 
+#define PCI_VENDOR_ID_ACCESIO                  0x494f
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SDB    0x1051
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2S     0x1053
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SDB    0x105C
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4S     0x105E
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_2DB  0x1091
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_2   0x1093
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4DB  0x1099
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_4   0x109B
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SMDB   0x10D1
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2SM    0x10D3
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SMDB   0x10DA
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4SM    0x10DC
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_1   0x1108
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_2   0x1110
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_2   0x1111
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_4   0x1118
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_4   0x1119
+#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2S      0x1152
+#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S      0x115A
+#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_2    0x1190
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_2   0x1191
+#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4    0x1198
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4   0x1199
+#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2SM     0x11D0
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM422_4    0x105A
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM485_4    0x105B
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM422_8    0x106A
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM485_8    0x106B
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4    0x1098
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_8    0x10A9
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SM     0x10D9
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_8SM     0x10E9
+#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4SM     0x11D8
+
+
+
 /* Unknown vendors/cards - this should not be in linux/pci_ids.h */
 #define PCI_SUBDEVICE_ID_UNKNOWN_0x1584        0x1584
 #define PCI_SUBDEVICE_ID_UNKNOWN_0x1588        0x1588
@@ -5112,6 +5149,108 @@ static struct pci_device_id serial_pci_tbl[] = {
                PCI_ANY_ID, PCI_ANY_ID,
                0,
                0, pbn_pericom_PI7C9X7958 },
+       /*
+        * ACCES I/O Products quad
+        */
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SDB,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2S,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SDB,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4S,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_2DB,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_2,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4DB,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_4,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SMDB,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2SM,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SMDB,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4SM,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_1,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_2,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_2,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_4,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_4,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2S,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_2,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_2,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2SM,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7954 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM422_4,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7958 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM485_4,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7958 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM422_8,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7958 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM485_8,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7958 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7958 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_8,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7958 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SM,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7958 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_8SM,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7958 },
+       {       PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4SM,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               pbn_pericom_PI7C9X7958 },
        /*
         * Topic TP560 Data/Fax/Voice 56k modem (reported by Evan Clarke)
         */
index 7481b95..bdfa659 100644 (file)
@@ -1618,8 +1618,6 @@ static void serial8250_disable_ms(struct uart_port *port)
        if (up->bugs & UART_BUG_NOMSR)
                return;
 
-       mctrl_gpio_disable_ms(up->gpios);
-
        up->ier &= ~UART_IER_MSI;
        serial_port_out(port, UART_IER, up->ier);
 }
@@ -1632,8 +1630,6 @@ static void serial8250_enable_ms(struct uart_port *port)
        if (up->bugs & UART_BUG_NOMSR)
                return;
 
-       mctrl_gpio_enable_ms(up->gpios);
-
        up->ier |= UART_IER_MSI;
 
        serial8250_rpm_get(up);
@@ -1917,8 +1913,7 @@ unsigned int serial8250_do_get_mctrl(struct uart_port *port)
                ret |= TIOCM_DSR;
        if (status & UART_MSR_CTS)
                ret |= TIOCM_CTS;
-
-       return mctrl_gpio_get(up->gpios, &ret);
+       return ret;
 }
 EXPORT_SYMBOL_GPL(serial8250_do_get_mctrl);
 
index c9ec839..7c6f7af 100644 (file)
@@ -6,7 +6,6 @@
 config SERIAL_8250
        tristate "8250/16550 and compatible serial support"
        select SERIAL_CORE
-       select SERIAL_MCTRL_GPIO if GPIOLIB
        ---help---
          This selects whether you want to include the driver for the standard
          serial ports.  The standard answer is Y.  People who might say N
index 065f5d9..b933568 100644 (file)
@@ -949,6 +949,15 @@ static int isr_setup_status_phase(struct ci_hdrc *ci)
        int retval;
        struct ci_hw_ep *hwep;
 
+       /*
+        * Unexpected USB controller behavior, caused by bad signal integrity
+        * or ground reference problems, can lead to isr_setup_status_phase
+        * being called with ci->status equal to NULL.
+        * If this situation occurs, you should review your USB hardware design.
+        */
+       if (WARN_ON_ONCE(!ci->status))
+               return -EPIPE;
+
        hwep = (ci->ep0_dir == TX) ? ci->ep0out : ci->ep0in;
        ci->status->context = ci;
        ci->status->complete = isr_setup_status_complete;
@@ -1596,8 +1605,11 @@ static int ci_udc_pullup(struct usb_gadget *_gadget, int is_on)
 {
        struct ci_hdrc *ci = container_of(_gadget, struct ci_hdrc, gadget);
 
-       /* Data+ pullup controlled by OTG state machine in OTG fsm mode */
-       if (ci_otg_is_fsm_mode(ci))
+       /*
+        * Data+ pullup controlled by OTG state machine in OTG fsm mode;
+        * and don't touch Data+ in host mode for dual role config.
+        */
+       if (ci_otg_is_fsm_mode(ci) || ci->role == CI_ROLE_HOST)
                return 0;
 
        pm_runtime_get_sync(&ci->gadget.dev);
index 7191230..0f3f62e 100644 (file)
@@ -1354,7 +1354,6 @@ made_compressed_probe:
        spin_lock_init(&acm->write_lock);
        spin_lock_init(&acm->read_lock);
        mutex_init(&acm->mutex);
-       acm->rx_endpoint = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress);
        acm->is_int_ep = usb_endpoint_xfer_int(epread);
        if (acm->is_int_ep)
                acm->bInterval = epread->bInterval;
@@ -1394,14 +1393,14 @@ made_compressed_probe:
                urb->transfer_dma = rb->dma;
                if (acm->is_int_ep) {
                        usb_fill_int_urb(urb, acm->dev,
-                                        acm->rx_endpoint,
+                                        usb_rcvintpipe(usb_dev, epread->bEndpointAddress),
                                         rb->base,
                                         acm->readsize,
                                         acm_read_bulk_callback, rb,
                                         acm->bInterval);
                } else {
                        usb_fill_bulk_urb(urb, acm->dev,
-                                         acm->rx_endpoint,
+                                         usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress),
                                          rb->base,
                                          acm->readsize,
                                          acm_read_bulk_callback, rb);
index 05ce308..1f1eabf 100644 (file)
@@ -96,7 +96,6 @@ struct acm {
        struct acm_rb read_buffers[ACM_NR];
        struct acm_wb *putbuffer;                       /* for acm_tty_put_char() */
        int rx_buflimit;
-       int rx_endpoint;
        spinlock_t read_lock;
        int write_used;                                 /* number of non-empty write buffers */
        int transmitting;
index 31ccdcc..15ce4ab 100644 (file)
@@ -171,6 +171,31 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
                                                        ep, buffer, size);
 }
 
+static const unsigned short low_speed_maxpacket_maxes[4] = {
+       [USB_ENDPOINT_XFER_CONTROL] = 8,
+       [USB_ENDPOINT_XFER_ISOC] = 0,
+       [USB_ENDPOINT_XFER_BULK] = 0,
+       [USB_ENDPOINT_XFER_INT] = 8,
+};
+static const unsigned short full_speed_maxpacket_maxes[4] = {
+       [USB_ENDPOINT_XFER_CONTROL] = 64,
+       [USB_ENDPOINT_XFER_ISOC] = 1023,
+       [USB_ENDPOINT_XFER_BULK] = 64,
+       [USB_ENDPOINT_XFER_INT] = 64,
+};
+static const unsigned short high_speed_maxpacket_maxes[4] = {
+       [USB_ENDPOINT_XFER_CONTROL] = 64,
+       [USB_ENDPOINT_XFER_ISOC] = 1024,
+       [USB_ENDPOINT_XFER_BULK] = 512,
+       [USB_ENDPOINT_XFER_INT] = 1024,
+};
+static const unsigned short super_speed_maxpacket_maxes[4] = {
+       [USB_ENDPOINT_XFER_CONTROL] = 512,
+       [USB_ENDPOINT_XFER_ISOC] = 1024,
+       [USB_ENDPOINT_XFER_BULK] = 1024,
+       [USB_ENDPOINT_XFER_INT] = 1024,
+};
+
 static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
     int asnum, struct usb_host_interface *ifp, int num_ep,
     unsigned char *buffer, int size)
@@ -179,6 +204,8 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
        struct usb_endpoint_descriptor *d;
        struct usb_host_endpoint *endpoint;
        int n, i, j, retval;
+       unsigned int maxp;
+       const unsigned short *maxpacket_maxes;
 
        d = (struct usb_endpoint_descriptor *) buffer;
        buffer += d->bLength;
@@ -286,6 +313,42 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
                        endpoint->desc.wMaxPacketSize = cpu_to_le16(8);
        }
 
+       /* Validate the wMaxPacketSize field */
+       maxp = usb_endpoint_maxp(&endpoint->desc);
+
+       /* Find the highest legal maxpacket size for this endpoint */
+       i = 0;          /* additional transactions per microframe */
+       switch (to_usb_device(ddev)->speed) {
+       case USB_SPEED_LOW:
+               maxpacket_maxes = low_speed_maxpacket_maxes;
+               break;
+       case USB_SPEED_FULL:
+               maxpacket_maxes = full_speed_maxpacket_maxes;
+               break;
+       case USB_SPEED_HIGH:
+               /* Bits 12..11 are allowed only for HS periodic endpoints */
+               if (usb_endpoint_xfer_int(d) || usb_endpoint_xfer_isoc(d)) {
+                       i = maxp & (BIT(12) | BIT(11));
+                       maxp &= ~i;
+               }
+               /* fallthrough */
+       default:
+               maxpacket_maxes = high_speed_maxpacket_maxes;
+               break;
+       case USB_SPEED_SUPER:
+       case USB_SPEED_SUPER_PLUS:
+               maxpacket_maxes = super_speed_maxpacket_maxes;
+               break;
+       }
+       j = maxpacket_maxes[usb_endpoint_type(&endpoint->desc)];
+
+       if (maxp > j) {
+               dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid maxpacket %d, setting to %d\n",
+                   cfgno, inum, asnum, d->bEndpointAddress, maxp, j);
+               maxp = j;
+               endpoint->desc.wMaxPacketSize = cpu_to_le16(i | maxp);
+       }
+
        /*
         * Some buggy high speed devices have bulk endpoints using
         * maxpacket sizes other than 512.  High speed HCDs may not
@@ -293,9 +356,6 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
         */
        if (to_usb_device(ddev)->speed == USB_SPEED_HIGH
                        && usb_endpoint_xfer_bulk(d)) {
-               unsigned maxp;
-
-               maxp = usb_endpoint_maxp(&endpoint->desc) & 0x07ff;
                if (maxp != 512)
                        dev_warn(ddev, "config %d interface %d altsetting %d "
                                "bulk endpoint 0x%X has invalid maxpacket %d\n",
index e9f5043..09c8d9c 100644 (file)
@@ -241,7 +241,8 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma)
                goto error_decrease_mem;
        }
 
-       mem = usb_alloc_coherent(ps->dev, size, GFP_USER, &dma_handle);
+       mem = usb_alloc_coherent(ps->dev, size, GFP_USER | __GFP_NOWARN,
+                       &dma_handle);
        if (!mem) {
                ret = -ENOMEM;
                goto error_free_usbm;
@@ -1708,11 +1709,17 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
        as->urb->start_frame = uurb->start_frame;
        as->urb->number_of_packets = number_of_packets;
        as->urb->stream_id = stream_id;
-       if (uurb->type == USBDEVFS_URB_TYPE_ISO ||
-                       ps->dev->speed == USB_SPEED_HIGH)
-               as->urb->interval = 1 << min(15, ep->desc.bInterval - 1);
-       else
-               as->urb->interval = ep->desc.bInterval;
+
+       if (ep->desc.bInterval) {
+               if (uurb->type == USBDEVFS_URB_TYPE_ISO ||
+                               ps->dev->speed == USB_SPEED_HIGH ||
+                               ps->dev->speed >= USB_SPEED_SUPER)
+                       as->urb->interval = 1 <<
+                                       min(15, ep->desc.bInterval - 1);
+               else
+                       as->urb->interval = ep->desc.bInterval;
+       }
+
        as->urb->context = as;
        as->urb->complete = async_completed;
        for (totlen = u = 0; u < number_of_packets; u++) {
@@ -2582,7 +2589,9 @@ static unsigned int usbdev_poll(struct file *file,
        if (file->f_mode & FMODE_WRITE && !list_empty(&ps->async_completed))
                mask |= POLLOUT | POLLWRNORM;
        if (!connected(ps))
-               mask |= POLLERR | POLLHUP;
+               mask |= POLLHUP;
+       if (list_empty(&ps->list))
+               mask |= POLLERR;
        return mask;
 }
 
index bee1351..1d5fc32 100644 (file)
@@ -1052,14 +1052,11 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
 
        /* Continue a partial initialization */
        if (type == HUB_INIT2 || type == HUB_INIT3) {
-               device_lock(hub->intfdev);
+               device_lock(&hdev->dev);
 
                /* Was the hub disconnected while we were waiting? */
-               if (hub->disconnected) {
-                       device_unlock(hub->intfdev);
-                       kref_put(&hub->kref, hub_release);
-                       return;
-               }
+               if (hub->disconnected)
+                       goto disconnected;
                if (type == HUB_INIT2)
                        goto init2;
                goto init3;
@@ -1262,7 +1259,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
                        queue_delayed_work(system_power_efficient_wq,
                                        &hub->init_work,
                                        msecs_to_jiffies(delay));
-                       device_unlock(hub->intfdev);
+                       device_unlock(&hdev->dev);
                        return;         /* Continues at init3: below */
                } else {
                        msleep(delay);
@@ -1281,12 +1278,12 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
        /* Scan all ports that need attention */
        kick_hub_wq(hub);
 
-       /* Allow autosuspend if it was suppressed */
-       if (type <= HUB_INIT3)
+       if (type == HUB_INIT2 || type == HUB_INIT3) {
+               /* Allow autosuspend if it was suppressed */
+ disconnected:
                usb_autopm_put_interface_async(to_usb_interface(hub->intfdev));
-
-       if (type == HUB_INIT2 || type == HUB_INIT3)
-               device_unlock(hub->intfdev);
+               device_unlock(&hdev->dev);
+       }
 
        kref_put(&hub->kref, hub_release);
 }
@@ -1315,8 +1312,6 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type)
        struct usb_device *hdev = hub->hdev;
        int i;
 
-       cancel_delayed_work_sync(&hub->init_work);
-
        /* hub_wq and related activity won't re-trigger */
        hub->quiescing = 1;
 
index 9fae029..d645512 100644 (file)
@@ -868,6 +868,7 @@ struct dwc2_hsotg {
        void *priv;
        int     irq;
        struct clk *clk;
+       struct reset_control *reset;
 
        unsigned int queuing_high_bandwidth:1;
        unsigned int srp_success:1;
index fc6f525..530959a 100644 (file)
@@ -45,6 +45,7 @@
 #include <linux/platform_device.h>
 #include <linux/phy/phy.h>
 #include <linux/platform_data/s3c-hsotg.h>
+#include <linux/reset.h>
 
 #include <linux/usb/of.h>
 
@@ -337,6 +338,24 @@ static int dwc2_lowlevel_hw_init(struct dwc2_hsotg *hsotg)
 {
        int i, ret;
 
+       hsotg->reset = devm_reset_control_get_optional(hsotg->dev, "dwc2");
+       if (IS_ERR(hsotg->reset)) {
+               ret = PTR_ERR(hsotg->reset);
+               switch (ret) {
+               case -ENOENT:
+               case -ENOTSUPP:
+                       hsotg->reset = NULL;
+                       break;
+               default:
+                       dev_err(hsotg->dev, "error getting reset control %d\n",
+                               ret);
+                       return ret;
+               }
+       }
+
+       if (hsotg->reset)
+               reset_control_deassert(hsotg->reset);
+
        /* Set default UTMI width */
        hsotg->phyif = GUSBCFG_PHYIF16;
 
@@ -434,6 +453,9 @@ static int dwc2_driver_remove(struct platform_device *dev)
        if (hsotg->ll_hw_enabled)
                dwc2_lowlevel_hw_disable(hsotg);
 
+       if (hsotg->reset)
+               reset_control_assert(hsotg->reset);
+
        return 0;
 }
 
index 9466431..35d0924 100644 (file)
@@ -1192,6 +1192,7 @@ static int dwc3_runtime_resume(struct device *dev)
        }
 
        pm_runtime_mark_last_busy(dev);
+       pm_runtime_put(dev);
 
        return 0;
 }
index 22dfc3d..33ab2a2 100644 (file)
@@ -192,7 +192,7 @@ dwc3_ep_event_string(const struct dwc3_event_depevt *event)
        int ret;
 
        ret = sprintf(str, "ep%d%s: ", epnum >> 1,
-                       (epnum & 1) ? "in" : "in");
+                       (epnum & 1) ? "in" : "out");
        if (ret < 0)
                return "UNKNOWN";
 
index 9743353..e56d59b 100644 (file)
@@ -61,6 +61,7 @@ static int dwc3_of_simple_probe(struct platform_device *pdev)
        if (!simple->clks)
                return -ENOMEM;
 
+       platform_set_drvdata(pdev, simple);
        simple->dev = dev;
 
        for (i = 0; i < simple->num_clocks; i++) {
index 45f5a23..6df0f5d 100644 (file)
@@ -37,6 +37,7 @@
 #define PCI_DEVICE_ID_INTEL_BXT                        0x0aaa
 #define PCI_DEVICE_ID_INTEL_BXT_M              0x1aaa
 #define PCI_DEVICE_ID_INTEL_APL                        0x5aaa
+#define PCI_DEVICE_ID_INTEL_KBP                        0xa2b0
 
 static const struct acpi_gpio_params reset_gpios = { 0, 0, false };
 static const struct acpi_gpio_params cs_gpios = { 1, 0, false };
@@ -227,6 +228,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT), },
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT_M), },
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_APL), },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBP), },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), },
        {  }    /* Terminating Entry */
 };
@@ -241,6 +243,15 @@ static int dwc3_pci_runtime_suspend(struct device *dev)
        return -EBUSY;
 }
 
+static int dwc3_pci_runtime_resume(struct device *dev)
+{
+       struct platform_device *dwc3 = dev_get_drvdata(dev);
+
+       return pm_runtime_get(&dwc3->dev);
+}
+#endif /* CONFIG_PM */
+
+#ifdef CONFIG_PM_SLEEP
 static int dwc3_pci_pm_dummy(struct device *dev)
 {
        /*
@@ -253,11 +264,11 @@ static int dwc3_pci_pm_dummy(struct device *dev)
         */
        return 0;
 }
-#endif /* CONFIG_PM */
+#endif /* CONFIG_PM_SLEEP */
 
 static struct dev_pm_ops dwc3_pci_dev_pm_ops = {
        SET_SYSTEM_SLEEP_PM_OPS(dwc3_pci_pm_dummy, dwc3_pci_pm_dummy)
-       SET_RUNTIME_PM_OPS(dwc3_pci_runtime_suspend, dwc3_pci_pm_dummy,
+       SET_RUNTIME_PM_OPS(dwc3_pci_runtime_suspend, dwc3_pci_runtime_resume,
                NULL)
 };
 
index 8f8c215..122e64d 100644 (file)
@@ -829,7 +829,7 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
        if (!req->request.no_interrupt && !chain)
                trb->ctrl |= DWC3_TRB_CTRL_IOC | DWC3_TRB_CTRL_ISP_IMI;
 
-       if (last)
+       if (last && !usb_endpoint_xfer_isoc(dep->endpoint.desc))
                trb->ctrl |= DWC3_TRB_CTRL_LST;
 
        if (chain)
@@ -1433,7 +1433,7 @@ static int dwc3_gadget_get_frame(struct usb_gadget *g)
 
 static int __dwc3_gadget_wakeup(struct dwc3 *dwc)
 {
-       unsigned long           timeout;
+       int                     retries;
 
        int                     ret;
        u32                     reg;
@@ -1484,9 +1484,9 @@ static int __dwc3_gadget_wakeup(struct dwc3 *dwc)
        }
 
        /* poll until Link State changes to ON */
-       timeout = jiffies + msecs_to_jiffies(100);
+       retries = 20000;
 
-       while (!time_after(jiffies, timeout)) {
+       while (retries--) {
                reg = dwc3_readl(dwc->regs, DWC3_DSTS);
 
                /* in HS, means ON */
@@ -1955,7 +1955,8 @@ static void dwc3_gadget_free_endpoints(struct dwc3 *dwc)
 
 static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
                struct dwc3_request *req, struct dwc3_trb *trb,
-               const struct dwc3_event_depevt *event, int status)
+               const struct dwc3_event_depevt *event, int status,
+               int chain)
 {
        unsigned int            count;
        unsigned int            s_pkt = 0;
@@ -1964,17 +1965,22 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
        dep->queued_requests--;
        trace_dwc3_complete_trb(dep, trb);
 
+       /*
+        * If we're in the middle of series of chained TRBs and we
+        * receive a short transfer along the way, DWC3 will skip
+        * through all TRBs including the last TRB in the chain (the
+        * where CHN bit is zero. DWC3 will also avoid clearing HWO
+        * bit and SW has to do it manually.
+        *
+        * We're going to do that here to avoid problems of HW trying
+        * to use bogus TRBs for transfers.
+        */
+       if (chain && (trb->ctrl & DWC3_TRB_CTRL_HWO))
+               trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+
        if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN)
-               /*
-                * We continue despite the error. There is not much we
-                * can do. If we don't clean it up we loop forever. If
-                * we skip the TRB then it gets overwritten after a
-                * while since we use them in a ring buffer. A BUG()
-                * would help. Lets hope that if this occurs, someone
-                * fixes the root cause instead of looking away :)
-                */
-               dev_err(dwc->dev, "%s's TRB (%p) still owned by HW\n",
-                               dep->name, trb);
+               return 1;
+
        count = trb->size & DWC3_TRB_SIZE_MASK;
 
        if (dep->direction) {
@@ -2013,15 +2019,7 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
                        s_pkt = 1;
        }
 
-       /*
-        * We assume here we will always receive the entire data block
-        * which we should receive. Meaning, if we program RX to
-        * receive 4K but we receive only 2K, we assume that's all we
-        * should receive and we simply bounce the request back to the
-        * gadget driver for further processing.
-        */
-       req->request.actual += req->request.length - count;
-       if (s_pkt)
+       if (s_pkt && !chain)
                return 1;
        if ((event->status & DEPEVT_STATUS_LST) &&
                        (trb->ctrl & (DWC3_TRB_CTRL_LST |
@@ -2040,13 +2038,17 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep,
        struct dwc3_trb         *trb;
        unsigned int            slot;
        unsigned int            i;
+       int                     count = 0;
        int                     ret;
 
        do {
+               int chain;
+
                req = next_request(&dep->started_list);
                if (WARN_ON_ONCE(!req))
                        return 1;
 
+               chain = req->request.num_mapped_sgs > 0;
                i = 0;
                do {
                        slot = req->first_trb_index + i;
@@ -2054,13 +2056,22 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep,
                                slot++;
                        slot %= DWC3_TRB_NUM;
                        trb = &dep->trb_pool[slot];
+                       count += trb->size & DWC3_TRB_SIZE_MASK;
 
                        ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb,
-                                       event, status);
+                                       event, status, chain);
                        if (ret)
                                break;
                } while (++i < req->request.num_mapped_sgs);
 
+               /*
+                * We assume here we will always receive the entire data block
+                * which we should receive. Meaning, if we program RX to
+                * receive 4K but we receive only 2K, we assume that's all we
+                * should receive and we simply bounce the request back to the
+                * gadget driver for further processing.
+                */
+               req->request.actual += req->request.length - count;
                dwc3_gadget_giveback(dep, req, status);
 
                if (ret)
index eb64848..5ebe6af 100644 (file)
@@ -1913,6 +1913,8 @@ unknown:
                        break;
 
                case USB_RECIP_ENDPOINT:
+                       if (!cdev->config)
+                               break;
                        endp = ((w_index & 0x80) >> 3) | (w_index & 0x0f);
                        list_for_each_entry(f, &cdev->config->functions, list) {
                                if (test_bit(endp, f->endpoints))
@@ -2124,14 +2126,14 @@ int composite_os_desc_req_prepare(struct usb_composite_dev *cdev,
 
        cdev->os_desc_req = usb_ep_alloc_request(ep0, GFP_KERNEL);
        if (!cdev->os_desc_req) {
-               ret = PTR_ERR(cdev->os_desc_req);
+               ret = -ENOMEM;
                goto end;
        }
 
        /* OS feature descriptor length <= 4kB */
        cdev->os_desc_req->buf = kmalloc(4096, GFP_KERNEL);
        if (!cdev->os_desc_req->buf) {
-               ret = PTR_ERR(cdev->os_desc_req->buf);
+               ret = -ENOMEM;
                kfree(cdev->os_desc_req);
                goto end;
        }
index 70cf347..f9237fe 100644 (file)
@@ -1490,7 +1490,9 @@ void unregister_gadget_item(struct config_item *item)
 {
        struct gadget_info *gi = to_gadget_info(item);
 
+       mutex_lock(&gi->lock);
        unregister_gadget(gi);
+       mutex_unlock(&gi->lock);
 }
 EXPORT_SYMBOL_GPL(unregister_gadget_item);
 
index d58bfc3..007ec6e 100644 (file)
@@ -341,11 +341,15 @@ static struct sk_buff *eem_wrap(struct gether *port, struct sk_buff *skb)
 {
        struct sk_buff  *skb2 = NULL;
        struct usb_ep   *in = port->in_ep;
-       int             padlen = 0;
-       u16             len = skb->len;
+       int             headroom, tailroom, padlen = 0;
+       u16             len;
 
-       int headroom = skb_headroom(skb);
-       int tailroom = skb_tailroom(skb);
+       if (!skb)
+               return NULL;
+
+       len = skb->len;
+       headroom = skb_headroom(skb);
+       tailroom = skb_tailroom(skb);
 
        /* When (len + EEM_HLEN + ETH_FCS_LEN) % in->maxpacket) is 0,
         * stick two bytes of zero-length EEM packet on the end.
index c800582..16562e4 100644 (file)
@@ -374,6 +374,9 @@ static struct sk_buff *rndis_add_header(struct gether *port,
 {
        struct sk_buff *skb2;
 
+       if (!skb)
+               return NULL;
+
        skb2 = skb_realloc_headroom(skb, sizeof(struct rndis_packet_msg_type));
        rndis_add_hdr(skb2);
 
index 943c21a..ab6ac1b 100644 (file)
@@ -680,6 +680,12 @@ static int rndis_reset_response(struct rndis_params *params,
 {
        rndis_reset_cmplt_type *resp;
        rndis_resp_t *r;
+       u8 *xbuf;
+       u32 length;
+
+       /* drain the response queue */
+       while ((xbuf = rndis_get_next_response(params, &length)))
+               rndis_free_response(params, xbuf);
 
        r = rndis_add_response(params, sizeof(rndis_reset_cmplt_type));
        if (!r)
index a3f7e7c..5f562c1 100644 (file)
@@ -556,7 +556,8 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
                        /* Multi frame CDC protocols may store the frame for
                         * later which is not a dropped frame.
                         */
-                       if (dev->port_usb->supports_multi_frame)
+                       if (dev->port_usb &&
+                                       dev->port_usb->supports_multi_frame)
                                goto multiframe;
                        goto drop;
                }
index 6ded634..e0cd1e4 100644 (file)
@@ -375,10 +375,15 @@ __acquires(&port->port_lock)
 */
 {
        struct list_head        *pool = &port->write_pool;
-       struct usb_ep           *in = port->port_usb->in;
+       struct usb_ep           *in;
        int                     status = 0;
        bool                    do_tty_wake = false;
 
+       if (!port->port_usb)
+               return status;
+
+       in = port->port_usb->in;
+
        while (!port->write_busy && !list_empty(pool)) {
                struct usb_request      *req;
                int                     len;
index 66753ba..31125a4 100644 (file)
@@ -2023,7 +2023,7 @@ static int uvcg_streaming_class_allow_link(struct config_item *src,
        if (!data) {
                kfree(*class_array);
                *class_array = NULL;
-               ret = PTR_ERR(data);
+               ret = -ENOMEM;
                goto unlock;
        }
        cl_arr = *class_array;
index aa3707b..16104b5 100644 (file)
@@ -542,7 +542,7 @@ static ssize_t ep_aio(struct kiocb *iocb,
         */
        spin_lock_irq(&epdata->dev->lock);
        value = -ENODEV;
-       if (unlikely(epdata->ep))
+       if (unlikely(epdata->ep == NULL))
                goto fail;
 
        req = usb_ep_alloc_request(epdata->ep, GFP_ATOMIC);
@@ -606,7 +606,7 @@ ep_read_iter(struct kiocb *iocb, struct iov_iter *to)
        }
        if (is_sync_kiocb(iocb)) {
                value = ep_io(epdata, buf, len);
-               if (value >= 0 && copy_to_iter(buf, value, to))
+               if (value >= 0 && (copy_to_iter(buf, value, to) != value))
                        value = -EFAULT;
        } else {
                struct kiocb_priv *priv = kzalloc(sizeof *priv, GFP_KERNEL);
index ff8685e..40c04bb 100644 (file)
@@ -827,7 +827,7 @@ void usb_gadget_unmap_request_by_dev(struct device *dev,
                return;
 
        if (req->num_mapped_sgs) {
-               dma_unmap_sg(dev, req->sg, req->num_mapped_sgs,
+               dma_unmap_sg(dev, req->sg, req->num_sgs,
                                is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 
                req->num_mapped_sgs = 0;
@@ -1145,7 +1145,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
                        if (ret != -EPROBE_DEFER)
                                list_del(&driver->pending);
                        if (ret)
-                               goto err4;
+                               goto err5;
                        break;
                }
        }
@@ -1154,6 +1154,9 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
 
        return 0;
 
+err5:
+       device_del(&udc->dev);
+
 err4:
        list_del(&udc->list);
        mutex_unlock(&udc_lock);
index 93d28cb..8bb011e 100644 (file)
@@ -1878,11 +1878,8 @@ static int qe_get_frame(struct usb_gadget *gadget)
 
        tmp = in_be16(&udc->usb_param->frame_n);
        if (tmp & 0x8000)
-               tmp = tmp & 0x07ff;
-       else
-               tmp = -EINVAL;
-
-       return (int)tmp;
+               return tmp & 0x07ff;
+       return -EINVAL;
 }
 
 static int fsl_qe_start(struct usb_gadget *gadget,
@@ -2053,7 +2050,7 @@ static void setup_received_handle(struct qe_udc *udc,
                        struct qe_ep *ep;
 
                        if (wValue != 0 || wLength != 0
-                               || pipe > USB_MAX_ENDPOINTS)
+                               || pipe >= USB_MAX_ENDPOINTS)
                                break;
                        ep = &udc->eps[pipe];
 
index 93a3bec..fb8fc34 100644 (file)
 
 /* DRD_CON */
 #define DRD_CON_PERI_CON       BIT(24)
+#define DRD_CON_VBOUT          BIT(0)
 
 /* USB_INT_ENA_1 and USB_INT_STA_1 */
 #define USB_INT_1_B3_PLLWKUP   BIT(31)
@@ -363,6 +364,7 @@ static void usb3_init_epc_registers(struct renesas_usb3 *usb3)
 {
        /* FIXME: How to change host / peripheral mode as well? */
        usb3_set_bit(usb3, DRD_CON_PERI_CON, USB3_DRD_CON);
+       usb3_clear_bit(usb3, DRD_CON_VBOUT, USB3_DRD_CON);
 
        usb3_write(usb3, ~0, USB3_USB_INT_STA_1);
        usb3_enable_irq_1(usb3, USB_INT_1_VBUS_CNG);
index a962b89..1e5f529 100644 (file)
@@ -332,11 +332,11 @@ static void ehci_turn_off_all_ports(struct ehci_hcd *ehci)
        int     port = HCS_N_PORTS(ehci->hcs_params);
 
        while (port--) {
-               ehci_writel(ehci, PORT_RWC_BITS,
-                               &ehci->regs->port_status[port]);
                spin_unlock_irq(&ehci->lock);
                ehci_port_power(ehci, port, false);
                spin_lock_irq(&ehci->lock);
+               ehci_writel(ehci, PORT_RWC_BITS,
+                               &ehci->regs->port_status[port]);
        }
 }
 
index c369c29..2f76900 100644 (file)
@@ -1675,7 +1675,7 @@ max3421_gpout_set_value(struct usb_hcd *hcd, u8 pin_number, u8 value)
        if (pin_number > 7)
                return;
 
-       mask = 1u << pin_number;
+       mask = 1u << (pin_number % 4);
        idx = pin_number / 4;
 
        if (value)
index d61fcc4..730b9fd 100644 (file)
@@ -386,6 +386,9 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend)
 
        ret = 0;
        virt_dev = xhci->devs[slot_id];
+       if (!virt_dev)
+               return -ENODEV;
+
        cmd = xhci_alloc_command(xhci, false, true, GFP_NOIO);
        if (!cmd) {
                xhci_dbg(xhci, "Couldn't allocate command structure.\n");
index 4fd041b..d7b0f97 100644 (file)
@@ -314,11 +314,12 @@ static void xhci_pci_remove(struct pci_dev *dev)
                usb_remove_hcd(xhci->shared_hcd);
                usb_put_hcd(xhci->shared_hcd);
        }
-       usb_hcd_pci_remove(dev);
 
        /* Workaround for spurious wakeups at shutdown with HSW */
        if (xhci->quirks & XHCI_SPURIOUS_WAKEUP)
                pci_set_power_state(dev, PCI_D3hot);
+
+       usb_hcd_pci_remove(dev);
 }
 
 #ifdef CONFIG_PM
index 918e0c7..797137e 100644 (file)
@@ -850,6 +850,10 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg)
        spin_lock_irqsave(&xhci->lock, flags);
 
        ep->stop_cmds_pending--;
+       if (xhci->xhc_state & XHCI_STATE_REMOVING) {
+               spin_unlock_irqrestore(&xhci->lock, flags);
+               return;
+       }
        if (xhci->xhc_state & XHCI_STATE_DYING) {
                xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
                                "Stop EP timer ran, but another timer marked "
@@ -903,7 +907,7 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg)
        spin_unlock_irqrestore(&xhci->lock, flags);
        xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
                        "Calling usb_hc_died()");
-       usb_hc_died(xhci_to_hcd(xhci)->primary_hcd);
+       usb_hc_died(xhci_to_hcd(xhci));
        xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
                        "xHCI host controller is dead.");
 }
@@ -1334,12 +1338,6 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 
        cmd = list_entry(xhci->cmd_list.next, struct xhci_command, cmd_list);
 
-       if (cmd->command_trb != xhci->cmd_ring->dequeue) {
-               xhci_err(xhci,
-                        "Command completion event does not match command\n");
-               return;
-       }
-
        del_timer(&xhci->cmd_timer);
 
        trace_xhci_cmd_completion(cmd_trb, (struct xhci_generic_trb *) event);
@@ -1351,6 +1349,13 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
                xhci_handle_stopped_cmd_ring(xhci, cmd);
                return;
        }
+
+       if (cmd->command_trb != xhci->cmd_ring->dequeue) {
+               xhci_err(xhci,
+                        "Command completion event does not match command\n");
+               return;
+       }
+
        /*
         * Host aborted the command ring, check if the current command was
         * supposed to be aborted, otherwise continue normally.
@@ -3243,7 +3248,8 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
        send_addr = addr;
 
        /* Queue the TRBs, even if they are zero-length */
-       for (enqd_len = 0; enqd_len < full_len; enqd_len += trb_buff_len) {
+       for (enqd_len = 0; first_trb || enqd_len < full_len;
+                       enqd_len += trb_buff_len) {
                field = TRB_TYPE(TRB_NORMAL);
 
                /* TRB buffer should not cross 64KB boundaries */
index 52c27ca..9b5b3b2 100644 (file)
@@ -665,7 +665,7 @@ static ssize_t ftdi_elan_read(struct file *file, char __user *buffer,
 {
        char data[30 *3 + 4];
        char *d = data;
-       int m = (sizeof(data) - 1) / 3;
+       int m = (sizeof(data) - 1) / 3 - 1;
        int bytes_read = 0;
        int retry_on_empty = 10;
        int retry_on_timeout = 5;
@@ -1684,7 +1684,7 @@ wait:if (ftdi->disconnected > 0) {
                        int i = 0;
                        char data[30 *3 + 4];
                        char *d = data;
-                       int m = (sizeof(data) - 1) / 3;
+                       int m = (sizeof(data) - 1) / 3 - 1;
                        int l = 0;
                        struct u132_target *target = &ftdi->target[ed];
                        struct u132_command *command = &ftdi->command[
@@ -1876,7 +1876,7 @@ more:{
                if (packet_bytes > 2) {
                        char diag[30 *3 + 4];
                        char *d = diag;
-                       int m = (sizeof(diag) - 1) / 3;
+                       int m = (sizeof(diag) - 1) / 3 - 1;
                        char *b = ftdi->bulk_in_buffer;
                        int bytes_read = 0;
                        diag[0] = 0;
@@ -2053,7 +2053,7 @@ static int ftdi_elan_synchronize(struct usb_ftdi *ftdi)
                        if (packet_bytes > 2) {
                                char diag[30 *3 + 4];
                                char *d = diag;
-                               int m = (sizeof(diag) - 1) / 3;
+                               int m = (sizeof(diag) - 1) / 3 - 1;
                                char *b = ftdi->bulk_in_buffer;
                                int bytes_read = 0;
                                unsigned char c = 0;
@@ -2155,7 +2155,7 @@ more:{
                if (packet_bytes > 2) {
                        char diag[30 *3 + 4];
                        char *d = diag;
-                       int m = (sizeof(diag) - 1) / 3;
+                       int m = (sizeof(diag) - 1) / 3 - 1;
                        char *b = ftdi->bulk_in_buffer;
                        int bytes_read = 0;
                        diag[0] = 0;
index 6b978f0..5c8210d 100644 (file)
@@ -585,7 +585,6 @@ static void sg_timeout(unsigned long _req)
 {
        struct usb_sg_request   *req = (struct usb_sg_request *) _req;
 
-       req->status = -ETIMEDOUT;
        usb_sg_cancel(req);
 }
 
@@ -616,8 +615,10 @@ static int perform_sglist(
                mod_timer(&sg_timer, jiffies +
                                msecs_to_jiffies(SIMPLE_IO_TIMEOUT));
                usb_sg_wait(req);
-               del_timer_sync(&sg_timer);
-               retval = req->status;
+               if (!del_timer_sync(&sg_timer))
+                       retval = -ETIMEDOUT;
+               else
+                       retval = req->status;
 
                /* FIXME check resulting data pattern */
 
@@ -2602,7 +2603,7 @@ usbtest_ioctl(struct usb_interface *intf, unsigned int code, void *buf)
        ktime_get_ts64(&start);
 
        retval = usbtest_do_ioctl(intf, param_32);
-       if (retval)
+       if (retval < 0)
                goto free_mutex;
 
        ktime_get_ts64(&end);
index 192248f..fe08e77 100644 (file)
@@ -290,6 +290,7 @@ int musb_hub_control(
        u32             temp;
        int             retval = 0;
        unsigned long   flags;
+       bool            start_musb = false;
 
        spin_lock_irqsave(&musb->lock, flags);
 
@@ -390,7 +391,7 @@ int musb_hub_control(
                         * logic relating to VBUS power-up.
                         */
                        if (!hcd->self.is_b_host && musb_has_gadget(musb))
-                               musb_start(musb);
+                               start_musb = true;
                        break;
                case USB_PORT_FEAT_RESET:
                        musb_port_reset(musb, true);
@@ -451,5 +452,9 @@ error:
                retval = -EPIPE;
        }
        spin_unlock_irqrestore(&musb->lock, flags);
+
+       if (start_musb)
+               musb_start(musb);
+
        return retval;
 }
index 980c9de..427efb5 100644 (file)
@@ -144,14 +144,18 @@ static irqreturn_t nop_gpio_vbus_thread(int irq, void *data)
 int usb_gen_phy_init(struct usb_phy *phy)
 {
        struct usb_phy_generic *nop = dev_get_drvdata(phy->dev);
+       int ret;
 
        if (!IS_ERR(nop->vcc)) {
                if (regulator_enable(nop->vcc))
                        dev_err(phy->dev, "Failed to enable power\n");
        }
 
-       if (!IS_ERR(nop->clk))
-               clk_prepare_enable(nop->clk);
+       if (!IS_ERR(nop->clk)) {
+               ret = clk_prepare_enable(nop->clk);
+               if (ret)
+                       return ret;
+       }
 
        nop_reset(nop);
 
index 6f6d2a7..6523af4 100644 (file)
@@ -140,6 +140,8 @@ static int omap_otg_probe(struct platform_device *pdev)
                 (rev >> 4) & 0xf, rev & 0xf, config->extcon, otg_dev->id,
                 otg_dev->vbus);
 
+       platform_set_drvdata(pdev, otg_dev);
+
        return 0;
 }
 
index 8fbbc2d..ac67bab 100644 (file)
@@ -514,7 +514,8 @@ static struct renesas_usbhs_platform_info *usbhs_parse_dt(struct device *dev)
        if (gpio > 0)
                dparam->enable_gpio = gpio;
 
-       if (dparam->type == USBHS_TYPE_RCAR_GEN2)
+       if (dparam->type == USBHS_TYPE_RCAR_GEN2 ||
+           dparam->type == USBHS_TYPE_RCAR_GEN3)
                dparam->has_usb_dmac = 1;
 
        return info;
index 280ed5f..857e783 100644 (file)
@@ -871,7 +871,7 @@ static int usbhsf_dma_prepare_push(struct usbhs_pkt *pkt, int *is_done)
 
        /* use PIO if packet is less than pio_dma_border or pipe is DCP */
        if ((len < usbhs_get_dparam(priv, pio_dma_border)) ||
-           usbhs_pipe_is_dcp(pipe))
+           usbhs_pipe_type_is(pipe, USB_ENDPOINT_XFER_ISOC))
                goto usbhsf_pio_prepare_push;
 
        /* check data length if this driver don't use USB-DMAC */
@@ -976,7 +976,7 @@ static int usbhsf_dma_prepare_pop_with_usb_dmac(struct usbhs_pkt *pkt,
 
        /* use PIO if packet is less than pio_dma_border or pipe is DCP */
        if ((pkt->length < usbhs_get_dparam(priv, pio_dma_border)) ||
-           usbhs_pipe_is_dcp(pipe))
+           usbhs_pipe_type_is(pipe, USB_ENDPOINT_XFER_ISOC))
                goto usbhsf_pio_prepare_pop;
 
        fifo = usbhsf_get_dma_fifo(priv, pkt);
index d4be5d5..28965ef 100644 (file)
@@ -282,9 +282,16 @@ static irqreturn_t usbhs_interrupt(int irq, void *data)
        if (usbhs_mod_is_host(priv))
                usbhs_write(priv, INTSTS1, ~irq_state.intsts1 & INTSTS1_MAGIC);
 
-       usbhs_write(priv, BRDYSTS, ~irq_state.brdysts);
+       /*
+        * The driver should not clear the xxxSTS after the line of
+        * "call irq callback functions" because each "if" statement is
+        * possible to call the callback function for avoiding any side effects.
+        */
+       if (irq_state.intsts0 & BRDY)
+               usbhs_write(priv, BRDYSTS, ~irq_state.brdysts);
        usbhs_write(priv, NRDYSTS, ~irq_state.nrdysts);
-       usbhs_write(priv, BEMPSTS, ~irq_state.bempsts);
+       if (irq_state.intsts0 & BEMP)
+               usbhs_write(priv, BEMPSTS, ~irq_state.bempsts);
 
        /*
         * call irq callback functions
index 50f3363..c4c6474 100644 (file)
@@ -617,10 +617,13 @@ static int usbhsg_ep_enable(struct usb_ep *ep,
                 * use dmaengine if possible.
                 * It will use pio handler if impossible.
                 */
-               if (usb_endpoint_dir_in(desc))
+               if (usb_endpoint_dir_in(desc)) {
                        pipe->handler = &usbhs_fifo_dma_push_handler;
-               else
+               } else {
                        pipe->handler = &usbhs_fifo_dma_pop_handler;
+                       usbhs_xxxsts_clear(priv, BRDYSTS,
+                                          usbhs_pipe_number(pipe));
+               }
 
                ret = 0;
        }
@@ -1073,7 +1076,7 @@ int usbhs_mod_gadget_probe(struct usbhs_priv *priv)
 
        gpriv->transceiver = usb_get_phy(USB_PHY_TYPE_UNDEFINED);
        dev_info(dev, "%stransceiver found\n",
-                gpriv->transceiver ? "" : "no ");
+                !IS_ERR(gpriv->transceiver) ? "" : "no ");
 
        /*
         * CAUTION
index 0082080..b2d767e 100644 (file)
@@ -648,6 +648,8 @@ static const struct usb_device_id id_table_combined[] = {
        { USB_DEVICE(FTDI_VID, FTDI_ELV_TFD128_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_FM3RX_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_ELV_WS777_PID) },
+       { USB_DEVICE(FTDI_VID, FTDI_PALMSENS_PID) },
+       { USB_DEVICE(FTDI_VID, FTDI_IVIUM_XSTAT_PID) },
        { USB_DEVICE(FTDI_VID, LINX_SDMUSBQSS_PID) },
        { USB_DEVICE(FTDI_VID, LINX_MASTERDEVEL2_PID) },
        { USB_DEVICE(FTDI_VID, LINX_FUTURE_0_PID) },
@@ -1008,6 +1010,7 @@ static const struct usb_device_id id_table_combined[] = {
        { USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) },
        { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) },
        { USB_DEVICE(ICPDAS_VID, ICPDAS_I7563U_PID) },
+       { USB_DEVICE(WICED_VID, WICED_USB20706V2_PID) },
        { }                                     /* Terminating entry */
 };
 
index c5d6c1e..f87a938 100644 (file)
 #define FTDI_4N_GALAXY_DE_2_PID        0xF3C1
 #define FTDI_4N_GALAXY_DE_3_PID        0xF3C2
 
+/*
+ * Ivium Technologies product IDs
+ */
+#define FTDI_PALMSENS_PID      0xf440
+#define FTDI_IVIUM_XSTAT_PID   0xf441
+
 /*
  * Linx Technologies product ids
  */
 #define INTREPID_VALUECAN_PID  0x0601
 #define INTREPID_NEOVI_PID     0x0701
 
+/*
+ * WICED USB UART
+ */
+#define WICED_VID              0x0A5C
+#define WICED_USB20706V2_PID   0x6422
+
 /*
  * Definitions for ID TECH (www.idt-net.com) devices
  */
index 5608af4..de9992b 100644 (file)
@@ -1252,7 +1252,7 @@ static int mos7720_write(struct tty_struct *tty, struct usb_serial_port *port,
 
        if (urb->transfer_buffer == NULL) {
                urb->transfer_buffer = kmalloc(URB_TRANSFER_BUFFER_SIZE,
-                                              GFP_KERNEL);
+                                              GFP_ATOMIC);
                if (!urb->transfer_buffer)
                        goto exit;
        }
index ed378fb..57426d7 100644 (file)
@@ -1340,8 +1340,8 @@ static int mos7840_write(struct tty_struct *tty, struct usb_serial_port *port,
        }
 
        if (urb->transfer_buffer == NULL) {
-               urb->transfer_buffer =
-                   kmalloc(URB_TRANSFER_BUFFER_SIZE, GFP_KERNEL);
+               urb->transfer_buffer = kmalloc(URB_TRANSFER_BUFFER_SIZE,
+                                              GFP_ATOMIC);
                if (!urb->transfer_buffer)
                        goto exit;
        }
index 8e07536..9894e34 100644 (file)
@@ -274,6 +274,12 @@ static void option_instat_callback(struct urb *urb);
 #define TELIT_PRODUCT_LE920                    0x1200
 #define TELIT_PRODUCT_LE910                    0x1201
 #define TELIT_PRODUCT_LE910_USBCFG4            0x1206
+#define TELIT_PRODUCT_LE920A4_1207             0x1207
+#define TELIT_PRODUCT_LE920A4_1208             0x1208
+#define TELIT_PRODUCT_LE920A4_1211             0x1211
+#define TELIT_PRODUCT_LE920A4_1212             0x1212
+#define TELIT_PRODUCT_LE920A4_1213             0x1213
+#define TELIT_PRODUCT_LE920A4_1214             0x1214
 
 /* ZTE PRODUCTS */
 #define ZTE_VENDOR_ID                          0x19d2
@@ -519,6 +525,12 @@ static void option_instat_callback(struct urb *urb);
 #define VIATELECOM_VENDOR_ID                   0x15eb
 #define VIATELECOM_PRODUCT_CDS7                        0x0001
 
+/* WeTelecom products */
+#define WETELECOM_VENDOR_ID                    0x22de
+#define WETELECOM_PRODUCT_WMD200               0x6801
+#define WETELECOM_PRODUCT_6802                 0x6802
+#define WETELECOM_PRODUCT_WMD300               0x6803
+
 struct option_blacklist_info {
        /* bitmask of interface numbers blacklisted for send_setup */
        const unsigned long sendsetup;
@@ -628,6 +640,11 @@ static const struct option_blacklist_info telit_le920_blacklist = {
        .reserved = BIT(1) | BIT(5),
 };
 
+static const struct option_blacklist_info telit_le920a4_blacklist_1 = {
+       .sendsetup = BIT(0),
+       .reserved = BIT(1),
+};
+
 static const struct option_blacklist_info telit_le922_blacklist_usbcfg0 = {
        .sendsetup = BIT(2),
        .reserved = BIT(0) | BIT(1) | BIT(3),
@@ -1203,6 +1220,16 @@ static const struct usb_device_id option_ids[] = {
                .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),
                .driver_info = (kernel_ulong_t)&telit_le920_blacklist },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1207) },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1208),
+               .driver_info = (kernel_ulong_t)&telit_le920a4_blacklist_1 },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1211),
+               .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1212),
+               .driver_info = (kernel_ulong_t)&telit_le920a4_blacklist_1 },
+       { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1213, 0xff) },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1214),
+               .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff),
                .driver_info = (kernel_ulong_t)&net_intf1_blacklist },
@@ -1966,9 +1993,13 @@ static const struct usb_device_id option_ids[] = {
          .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
        { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */
        { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */
+       { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */
        { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) },                /* OLICARD300 - MT6225 */
        { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) },
        { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) },
+       { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) },
+       { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_6802, 0xff, 0xff, 0xff) },
+       { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD300, 0xff, 0xff, 0xff) },
        { } /* Terminating entry */
 };
 MODULE_DEVICE_TABLE(usb, option_ids);
index b1b9bac..d213cf4 100644 (file)
@@ -1433,7 +1433,7 @@ int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[]
 
        rc = usb_register(udriver);
        if (rc)
-               return rc;
+               goto failed_usb_register;
 
        for (sd = serial_drivers; *sd; ++sd) {
                (*sd)->usb_driver = udriver;
@@ -1451,6 +1451,8 @@ int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[]
        while (sd-- > serial_drivers)
                usb_serial_deregister(*sd);
        usb_deregister(udriver);
+failed_usb_register:
+       kfree(udriver);
        return rc;
 }
 EXPORT_SYMBOL_GPL(usb_serial_register_drivers);
index 9d6320e..6e29d05 100644 (file)
@@ -88,7 +88,7 @@ struct vhost_scsi_cmd {
        struct scatterlist *tvc_prot_sgl;
        struct page **tvc_upages;
        /* Pointer to response header iovec */
-       struct iovec *tvc_resp_iov;
+       struct iovec tvc_resp_iov;
        /* Pointer to vhost_scsi for our device */
        struct vhost_scsi *tvc_vhost;
        /* Pointer to vhost_virtqueue for the cmd */
@@ -547,7 +547,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
                memcpy(v_rsp.sense, cmd->tvc_sense_buf,
                       se_cmd->scsi_sense_length);
 
-               iov_iter_init(&iov_iter, READ, cmd->tvc_resp_iov,
+               iov_iter_init(&iov_iter, READ, &cmd->tvc_resp_iov,
                              cmd->tvc_in_iovs, sizeof(v_rsp));
                ret = copy_to_iter(&v_rsp, sizeof(v_rsp), &iov_iter);
                if (likely(ret == sizeof(v_rsp))) {
@@ -1044,7 +1044,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
                }
                cmd->tvc_vhost = vs;
                cmd->tvc_vq = vq;
-               cmd->tvc_resp_iov = &vq->iov[out];
+               cmd->tvc_resp_iov = vq->iov[out];
                cmd->tvc_in_iovs = in;
 
                pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n",
index e383ecd..ed9c9ee 100644 (file)
@@ -167,7 +167,7 @@ static bool vring_use_dma_api(struct virtio_device *vdev)
  * making all of the arch DMA ops work on the vring device itself
  * is a mess.  For now, we use the parent device for DMA ops.
  */
-struct device *vring_dma_dev(const struct vring_virtqueue *vq)
+static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
 {
        return vq->vq.vdev->dev.parent;
 }
index 7487971..c1010f0 100644 (file)
@@ -316,7 +316,7 @@ static int xenbus_write_transaction(unsigned msg_type,
                        rc = -ENOMEM;
                        goto out;
                }
-       } else {
+       } else if (msg_type == XS_TRANSACTION_END) {
                list_for_each_entry(trans, &u->transactions, list)
                        if (trans->handle.id == u->u.msg.tx_id)
                                break;
index 7ef637d..1e9d2f8 100644 (file)
@@ -461,8 +461,8 @@ static void afs_callback_updater(struct work_struct *work)
  */
 int __init afs_callback_update_init(void)
 {
-       afs_callback_update_worker =
-               create_singlethread_workqueue("kafs_callbackd");
+       afs_callback_update_worker = alloc_ordered_workqueue("kafs_callbackd",
+                                                            WQ_MEM_RECLAIM);
        return afs_callback_update_worker ? 0 : -ENOMEM;
 }
 
index 85737e9..2037e7a 100644 (file)
 #include "internal.h"
 #include "afs_cm.h"
 
-#if 0
-struct workqueue_struct *afs_cm_workqueue;
-#endif  /*  0  */
-
-static int afs_deliver_cb_init_call_back_state(struct afs_call *,
-                                              struct sk_buff *, bool);
-static int afs_deliver_cb_init_call_back_state3(struct afs_call *,
-                                               struct sk_buff *, bool);
-static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool);
-static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool);
-static int afs_deliver_cb_probe_uuid(struct afs_call *, struct sk_buff *, bool);
-static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *,
-                                                struct sk_buff *, bool);
+static int afs_deliver_cb_init_call_back_state(struct afs_call *);
+static int afs_deliver_cb_init_call_back_state3(struct afs_call *);
+static int afs_deliver_cb_probe(struct afs_call *);
+static int afs_deliver_cb_callback(struct afs_call *);
+static int afs_deliver_cb_probe_uuid(struct afs_call *);
+static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *);
 static void afs_cm_destructor(struct afs_call *);
 
 /*
@@ -134,7 +127,7 @@ static void afs_cm_destructor(struct afs_call *call)
         * received.  The step number here must match the final number in
         * afs_deliver_cb_callback().
         */
-       if (call->unmarshall == 6) {
+       if (call->unmarshall == 5) {
                ASSERT(call->server && call->count && call->request);
                afs_break_callbacks(call->server, call->count, call->request);
        }
@@ -168,27 +161,27 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
 /*
  * deliver request data to a CB.CallBack call
  */
-static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
-                                  bool last)
+static int afs_deliver_cb_callback(struct afs_call *call)
 {
+       struct sockaddr_rxrpc srx;
        struct afs_callback *cb;
        struct afs_server *server;
-       struct in_addr addr;
        __be32 *bp;
        u32 tmp;
        int ret, loop;
 
-       _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+       _enter("{%u}", call->unmarshall);
 
        switch (call->unmarshall) {
        case 0:
+               rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
                call->offset = 0;
                call->unmarshall++;
 
                /* extract the FID array and its count in two steps */
        case 1:
                _debug("extract FID count");
-               ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+               ret = afs_extract_data(call, &call->tmp, 4, true);
                if (ret < 0)
                        return ret;
 
@@ -205,8 +198,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
 
        case 2:
                _debug("extract FID array");
-               ret = afs_extract_data(call, skb, last, call->buffer,
-                                      call->count * 3 * 4);
+               ret = afs_extract_data(call, call->buffer,
+                                      call->count * 3 * 4, true);
                if (ret < 0)
                        return ret;
 
@@ -232,7 +225,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
                /* extract the callback array and its count in two steps */
        case 3:
                _debug("extract CB count");
-               ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+               ret = afs_extract_data(call, &call->tmp, 4, true);
                if (ret < 0)
                        return ret;
 
@@ -242,13 +235,11 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
                        return -EBADMSG;
                call->offset = 0;
                call->unmarshall++;
-               if (tmp == 0)
-                       goto empty_cb_array;
 
        case 4:
                _debug("extract CB array");
-               ret = afs_extract_data(call, skb, last, call->request,
-                                      call->count * 3 * 4);
+               ret = afs_extract_data(call, call->buffer,
+                                      call->count * 3 * 4, false);
                if (ret < 0)
                        return ret;
 
@@ -261,15 +252,9 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
                        cb->type        = ntohl(*bp++);
                }
 
-       empty_cb_array:
                call->offset = 0;
                call->unmarshall++;
 
-       case 5:
-               ret = afs_data_complete(call, skb, last);
-               if (ret < 0)
-                       return ret;
-
                /* Record that the message was unmarshalled successfully so
                 * that the call destructor can know do the callback breaking
                 * work, even if the final ACK isn't received.
@@ -278,17 +263,15 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
                 * updated also.
                 */
                call->unmarshall++;
-       case 6:
+       case 5:
                break;
        }
 
-
        call->state = AFS_CALL_REPLYING;
 
        /* we'll need the file server record as that tells us which set of
         * vnodes to operate upon */
-       memcpy(&addr, &ip_hdr(skb)->saddr, 4);
-       server = afs_find_server(&addr);
+       server = afs_find_server(&srx);
        if (!server)
                return -ENOTCONN;
        call->server = server;
@@ -315,17 +298,17 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
 /*
  * deliver request data to a CB.InitCallBackState call
  */
-static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
-                                              struct sk_buff *skb,
-                                              bool last)
+static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
 {
+       struct sockaddr_rxrpc srx;
        struct afs_server *server;
-       struct in_addr addr;
        int ret;
 
-       _enter(",{%u},%d", skb->len, last);
+       _enter("");
+
+       rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
 
-       ret = afs_data_complete(call, skb, last);
+       ret = afs_extract_data(call, NULL, 0, false);
        if (ret < 0)
                return ret;
 
@@ -334,8 +317,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
 
        /* we'll need the file server record as that tells us which set of
         * vnodes to operate upon */
-       memcpy(&addr, &ip_hdr(skb)->saddr, 4);
-       server = afs_find_server(&addr);
+       server = afs_find_server(&srx);
        if (!server)
                return -ENOTCONN;
        call->server = server;
@@ -348,27 +330,68 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
 /*
  * deliver request data to a CB.InitCallBackState3 call
  */
-static int afs_deliver_cb_init_call_back_state3(struct afs_call *call,
-                                               struct sk_buff *skb,
-                                               bool last)
+static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 {
+       struct sockaddr_rxrpc srx;
        struct afs_server *server;
-       struct in_addr addr;
+       struct afs_uuid *r;
+       unsigned loop;
+       __be32 *b;
+       int ret;
+
+       _enter("");
+
+       rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
+
+       _enter("{%u}", call->unmarshall);
 
-       _enter(",{%u},%d", skb->len, last);
+       switch (call->unmarshall) {
+       case 0:
+               call->offset = 0;
+               call->buffer = kmalloc(11 * sizeof(__be32), GFP_KERNEL);
+               if (!call->buffer)
+                       return -ENOMEM;
+               call->unmarshall++;
 
-       /* There are some arguments that we ignore */
-       afs_data_consumed(call, skb);
-       if (!last)
-               return -EAGAIN;
+       case 1:
+               _debug("extract UUID");
+               ret = afs_extract_data(call, call->buffer,
+                                      11 * sizeof(__be32), false);
+               switch (ret) {
+               case 0:         break;
+               case -EAGAIN:   return 0;
+               default:        return ret;
+               }
+
+               _debug("unmarshall UUID");
+               call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
+               if (!call->request)
+                       return -ENOMEM;
+
+               b = call->buffer;
+               r = call->request;
+               r->time_low                     = ntohl(b[0]);
+               r->time_mid                     = ntohl(b[1]);
+               r->time_hi_and_version          = ntohl(b[2]);
+               r->clock_seq_hi_and_reserved    = ntohl(b[3]);
+               r->clock_seq_low                = ntohl(b[4]);
+
+               for (loop = 0; loop < 6; loop++)
+                       r->node[loop] = ntohl(b[loop + 5]);
+
+               call->offset = 0;
+               call->unmarshall++;
+
+       case 2:
+               break;
+       }
 
        /* no unmarshalling required */
        call->state = AFS_CALL_REPLYING;
 
        /* we'll need the file server record as that tells us which set of
         * vnodes to operate upon */
-       memcpy(&addr, &ip_hdr(skb)->saddr, 4);
-       server = afs_find_server(&addr);
+       server = afs_find_server(&srx);
        if (!server)
                return -ENOTCONN;
        call->server = server;
@@ -393,14 +416,13 @@ static void SRXAFSCB_Probe(struct work_struct *work)
 /*
  * deliver request data to a CB.Probe call
  */
-static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
-                               bool last)
+static int afs_deliver_cb_probe(struct afs_call *call)
 {
        int ret;
 
-       _enter(",{%u},%d", skb->len, last);
+       _enter("");
 
-       ret = afs_data_complete(call, skb, last);
+       ret = afs_extract_data(call, NULL, 0, false);
        if (ret < 0)
                return ret;
 
@@ -426,7 +448,6 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
 
        _enter("");
 
-
        if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0)
                reply.match = htonl(0);
        else
@@ -439,19 +460,14 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
 /*
  * deliver request data to a CB.ProbeUuid call
  */
-static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
-                                    bool last)
+static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 {
        struct afs_uuid *r;
        unsigned loop;
        __be32 *b;
        int ret;
 
-       _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
-
-       ret = afs_data_complete(call, skb, last);
-       if (ret < 0)
-               return ret;
+       _enter("{%u}", call->unmarshall);
 
        switch (call->unmarshall) {
        case 0:
@@ -463,8 +479,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
 
        case 1:
                _debug("extract UUID");
-               ret = afs_extract_data(call, skb, last, call->buffer,
-                                      11 * sizeof(__be32));
+               ret = afs_extract_data(call, call->buffer,
+                                      11 * sizeof(__be32), false);
                switch (ret) {
                case 0:         break;
                case -EAGAIN:   return 0;
@@ -491,16 +507,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
                call->unmarshall++;
 
        case 2:
-               _debug("trailer");
-               if (skb->len != 0)
-                       return -EBADMSG;
                break;
        }
 
-       ret = afs_data_complete(call, skb, last);
-       if (ret < 0)
-               return ret;
-
        call->state = AFS_CALL_REPLYING;
 
        INIT_WORK(&call->work, SRXAFSCB_ProbeUuid);
@@ -574,14 +583,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
 /*
  * deliver request data to a CB.TellMeAboutYourself call
  */
-static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call,
-                                                struct sk_buff *skb, bool last)
+static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
 {
        int ret;
 
-       _enter(",{%u},%d", skb->len, last);
+       _enter("");
 
-       ret = afs_data_complete(call, skb, last);
+       ret = afs_extract_data(call, NULL, 0, false);
        if (ret < 0)
                return ret;
 
index d91a9c9..3191dff 100644 (file)
@@ -36,8 +36,8 @@ static int afs_init_lock_manager(void)
        if (!afs_lock_manager) {
                mutex_lock(&afs_lock_manager_mutex);
                if (!afs_lock_manager) {
-                       afs_lock_manager =
-                               create_singlethread_workqueue("kafs_lockd");
+                       afs_lock_manager = alloc_workqueue("kafs_lockd",
+                                                          WQ_MEM_RECLAIM, 0);
                        if (!afs_lock_manager)
                                ret = -ENOMEM;
                }
index 9312b92..96f4d76 100644 (file)
@@ -235,16 +235,15 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp,
 /*
  * deliver reply data to an FS.FetchStatus
  */
-static int afs_deliver_fs_fetch_status(struct afs_call *call,
-                                      struct sk_buff *skb, bool last)
+static int afs_deliver_fs_fetch_status(struct afs_call *call)
 {
        struct afs_vnode *vnode = call->reply;
        const __be32 *bp;
        int ret;
 
-       _enter(",,%u", last);
+       _enter("");
 
-       ret = afs_transfer_reply(call, skb, last);
+       ret = afs_transfer_reply(call);
        if (ret < 0)
                return ret;
 
@@ -307,8 +306,7 @@ int afs_fs_fetch_file_status(struct afs_server *server,
 /*
  * deliver reply data to an FS.FetchData
  */
-static int afs_deliver_fs_fetch_data(struct afs_call *call,
-                                    struct sk_buff *skb, bool last)
+static int afs_deliver_fs_fetch_data(struct afs_call *call)
 {
        struct afs_vnode *vnode = call->reply;
        const __be32 *bp;
@@ -316,7 +314,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
        void *buffer;
        int ret;
 
-       _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+       _enter("{%u}", call->unmarshall);
 
        switch (call->unmarshall) {
        case 0:
@@ -332,7 +330,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
                 * client) */
        case 1:
                _debug("extract data length (MSW)");
-               ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+               ret = afs_extract_data(call, &call->tmp, 4, true);
                if (ret < 0)
                        return ret;
 
@@ -347,7 +345,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
                /* extract the returned data length */
        case 2:
                _debug("extract data length");
-               ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+               ret = afs_extract_data(call, &call->tmp, 4, true);
                if (ret < 0)
                        return ret;
 
@@ -363,10 +361,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
                _debug("extract data");
                if (call->count > 0) {
                        page = call->reply3;
-                       buffer = kmap_atomic(page);
-                       ret = afs_extract_data(call, skb, last, buffer,
-                                              call->count);
-                       kunmap_atomic(buffer);
+                       buffer = kmap(page);
+                       ret = afs_extract_data(call, buffer,
+                                              call->count, true);
+                       kunmap(buffer);
                        if (ret < 0)
                                return ret;
                }
@@ -376,8 +374,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
 
                /* extract the metadata */
        case 4:
-               ret = afs_extract_data(call, skb, last, call->buffer,
-                                      (21 + 3 + 6) * 4);
+               ret = afs_extract_data(call, call->buffer,
+                                      (21 + 3 + 6) * 4, false);
                if (ret < 0)
                        return ret;
 
@@ -391,18 +389,15 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
                call->unmarshall++;
 
        case 5:
-               ret = afs_data_complete(call, skb, last);
-               if (ret < 0)
-                       return ret;
                break;
        }
 
        if (call->count < PAGE_SIZE) {
                _debug("clear");
                page = call->reply3;
-               buffer = kmap_atomic(page);
+               buffer = kmap(page);
                memset(buffer + call->count, 0, PAGE_SIZE - call->count);
-               kunmap_atomic(buffer);
+               kunmap(buffer);
        }
 
        _leave(" = 0 [done]");
@@ -515,13 +510,12 @@ int afs_fs_fetch_data(struct afs_server *server,
 /*
  * deliver reply data to an FS.GiveUpCallBacks
  */
-static int afs_deliver_fs_give_up_callbacks(struct afs_call *call,
-                                           struct sk_buff *skb, bool last)
+static int afs_deliver_fs_give_up_callbacks(struct afs_call *call)
 {
-       _enter(",{%u},%d", skb->len, last);
+       _enter("");
 
        /* shouldn't be any reply data */
-       return afs_data_complete(call, skb, last);
+       return afs_extract_data(call, NULL, 0, false);
 }
 
 /*
@@ -599,16 +593,15 @@ int afs_fs_give_up_callbacks(struct afs_server *server,
 /*
  * deliver reply data to an FS.CreateFile or an FS.MakeDir
  */
-static int afs_deliver_fs_create_vnode(struct afs_call *call,
-                                      struct sk_buff *skb, bool last)
+static int afs_deliver_fs_create_vnode(struct afs_call *call)
 {
        struct afs_vnode *vnode = call->reply;
        const __be32 *bp;
        int ret;
 
-       _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+       _enter("{%u}", call->unmarshall);
 
-       ret = afs_transfer_reply(call, skb, last);
+       ret = afs_transfer_reply(call);
        if (ret < 0)
                return ret;
 
@@ -696,16 +689,15 @@ int afs_fs_create(struct afs_server *server,
 /*
  * deliver reply data to an FS.RemoveFile or FS.RemoveDir
  */
-static int afs_deliver_fs_remove(struct afs_call *call,
-                                struct sk_buff *skb, bool last)
+static int afs_deliver_fs_remove(struct afs_call *call)
 {
        struct afs_vnode *vnode = call->reply;
        const __be32 *bp;
        int ret;
 
-       _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+       _enter("{%u}", call->unmarshall);
 
-       ret = afs_transfer_reply(call, skb, last);
+       ret = afs_transfer_reply(call);
        if (ret < 0)
                return ret;
 
@@ -777,16 +769,15 @@ int afs_fs_remove(struct afs_server *server,
 /*
  * deliver reply data to an FS.Link
  */
-static int afs_deliver_fs_link(struct afs_call *call,
-                              struct sk_buff *skb, bool last)
+static int afs_deliver_fs_link(struct afs_call *call)
 {
        struct afs_vnode *dvnode = call->reply, *vnode = call->reply2;
        const __be32 *bp;
        int ret;
 
-       _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+       _enter("{%u}", call->unmarshall);
 
-       ret = afs_transfer_reply(call, skb, last);
+       ret = afs_transfer_reply(call);
        if (ret < 0)
                return ret;
 
@@ -863,16 +854,15 @@ int afs_fs_link(struct afs_server *server,
 /*
  * deliver reply data to an FS.Symlink
  */
-static int afs_deliver_fs_symlink(struct afs_call *call,
-                                 struct sk_buff *skb, bool last)
+static int afs_deliver_fs_symlink(struct afs_call *call)
 {
        struct afs_vnode *vnode = call->reply;
        const __be32 *bp;
        int ret;
 
-       _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+       _enter("{%u}", call->unmarshall);
 
-       ret = afs_transfer_reply(call, skb, last);
+       ret = afs_transfer_reply(call);
        if (ret < 0)
                return ret;
 
@@ -968,16 +958,15 @@ int afs_fs_symlink(struct afs_server *server,
 /*
  * deliver reply data to an FS.Rename
  */
-static int afs_deliver_fs_rename(struct afs_call *call,
-                                 struct sk_buff *skb, bool last)
+static int afs_deliver_fs_rename(struct afs_call *call)
 {
        struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2;
        const __be32 *bp;
        int ret;
 
-       _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+       _enter("{%u}", call->unmarshall);
 
-       ret = afs_transfer_reply(call, skb, last);
+       ret = afs_transfer_reply(call);
        if (ret < 0)
                return ret;
 
@@ -1072,16 +1061,15 @@ int afs_fs_rename(struct afs_server *server,
 /*
  * deliver reply data to an FS.StoreData
  */
-static int afs_deliver_fs_store_data(struct afs_call *call,
-                                    struct sk_buff *skb, bool last)
+static int afs_deliver_fs_store_data(struct afs_call *call)
 {
        struct afs_vnode *vnode = call->reply;
        const __be32 *bp;
        int ret;
 
-       _enter(",,%u", last);
+       _enter("");
 
-       ret = afs_transfer_reply(call, skb, last);
+       ret = afs_transfer_reply(call);
        if (ret < 0)
                return ret;
 
@@ -1251,17 +1239,16 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
 /*
  * deliver reply data to an FS.StoreStatus
  */
-static int afs_deliver_fs_store_status(struct afs_call *call,
-                                      struct sk_buff *skb, bool last)
+static int afs_deliver_fs_store_status(struct afs_call *call)
 {
        afs_dataversion_t *store_version;
        struct afs_vnode *vnode = call->reply;
        const __be32 *bp;
        int ret;
 
-       _enter(",,%u", last);
+       _enter("");
 
-       ret = afs_transfer_reply(call, skb, last);
+       ret = afs_transfer_reply(call);
        if (ret < 0)
                return ret;
 
@@ -1443,14 +1430,13 @@ int afs_fs_setattr(struct afs_server *server, struct key *key,
 /*
  * deliver reply data to an FS.GetVolumeStatus
  */
-static int afs_deliver_fs_get_volume_status(struct afs_call *call,
-                                           struct sk_buff *skb, bool last)
+static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 {
        const __be32 *bp;
        char *p;
        int ret;
 
-       _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+       _enter("{%u}", call->unmarshall);
 
        switch (call->unmarshall) {
        case 0:
@@ -1460,8 +1446,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                /* extract the returned status record */
        case 1:
                _debug("extract status");
-               ret = afs_extract_data(call, skb, last, call->buffer,
-                                      12 * 4);
+               ret = afs_extract_data(call, call->buffer,
+                                      12 * 4, true);
                if (ret < 0)
                        return ret;
 
@@ -1472,7 +1458,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
 
                /* extract the volume name length */
        case 2:
-               ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+               ret = afs_extract_data(call, &call->tmp, 4, true);
                if (ret < 0)
                        return ret;
 
@@ -1487,8 +1473,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
        case 3:
                _debug("extract volname");
                if (call->count > 0) {
-                       ret = afs_extract_data(call, skb, last, call->reply3,
-                                              call->count);
+                       ret = afs_extract_data(call, call->reply3,
+                                              call->count, true);
                        if (ret < 0)
                                return ret;
                }
@@ -1508,8 +1494,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                call->count = 4 - (call->count & 3);
 
        case 4:
-               ret = afs_extract_data(call, skb, last, call->buffer,
-                                      call->count);
+               ret = afs_extract_data(call, call->buffer,
+                                      call->count, true);
                if (ret < 0)
                        return ret;
 
@@ -1519,7 +1505,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
 
                /* extract the offline message length */
        case 5:
-               ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+               ret = afs_extract_data(call, &call->tmp, 4, true);
                if (ret < 0)
                        return ret;
 
@@ -1534,8 +1520,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
        case 6:
                _debug("extract offline");
                if (call->count > 0) {
-                       ret = afs_extract_data(call, skb, last, call->reply3,
-                                              call->count);
+                       ret = afs_extract_data(call, call->reply3,
+                                              call->count, true);
                        if (ret < 0)
                                return ret;
                }
@@ -1555,8 +1541,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                call->count = 4 - (call->count & 3);
 
        case 7:
-               ret = afs_extract_data(call, skb, last, call->buffer,
-                                      call->count);
+               ret = afs_extract_data(call, call->buffer,
+                                      call->count, true);
                if (ret < 0)
                        return ret;
 
@@ -1566,7 +1552,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
 
                /* extract the message of the day length */
        case 8:
-               ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+               ret = afs_extract_data(call, &call->tmp, 4, true);
                if (ret < 0)
                        return ret;
 
@@ -1581,8 +1567,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
        case 9:
                _debug("extract motd");
                if (call->count > 0) {
-                       ret = afs_extract_data(call, skb, last, call->reply3,
-                                              call->count);
+                       ret = afs_extract_data(call, call->reply3,
+                                              call->count, true);
                        if (ret < 0)
                                return ret;
                }
@@ -1595,26 +1581,17 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                call->unmarshall++;
 
                /* extract the message of the day padding */
-               if ((call->count & 3) == 0) {
-                       call->unmarshall++;
-                       goto no_motd_padding;
-               }
-               call->count = 4 - (call->count & 3);
+               call->count = (4 - (call->count & 3)) & 3;
 
        case 10:
-               ret = afs_extract_data(call, skb, last, call->buffer,
-                                      call->count);
+               ret = afs_extract_data(call, call->buffer,
+                                      call->count, false);
                if (ret < 0)
                        return ret;
 
                call->offset = 0;
                call->unmarshall++;
-       no_motd_padding:
-
        case 11:
-               ret = afs_data_complete(call, skb, last);
-               if (ret < 0)
-                       return ret;
                break;
        }
 
@@ -1685,15 +1662,14 @@ int afs_fs_get_volume_status(struct afs_server *server,
 /*
  * deliver reply data to an FS.SetLock, FS.ExtendLock or FS.ReleaseLock
  */
-static int afs_deliver_fs_xxxx_lock(struct afs_call *call,
-                                   struct sk_buff *skb, bool last)
+static int afs_deliver_fs_xxxx_lock(struct afs_call *call)
 {
        const __be32 *bp;
        int ret;
 
-       _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+       _enter("{%u}", call->unmarshall);
 
-       ret = afs_transfer_reply(call, skb, last);
+       ret = afs_transfer_reply(call);
        if (ret < 0)
                return ret;
 
index df976b2..5497c84 100644 (file)
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include <linux/skbuff.h>
 #include <linux/rxrpc.h>
 #include <linux/key.h>
 #include <linux/workqueue.h>
 #include <linux/sched.h>
 #include <linux/fscache.h>
 #include <linux/backing-dev.h>
+#include <net/af_rxrpc.h>
 
 #include "afs.h"
 #include "afs_vl.h"
@@ -56,7 +56,7 @@ struct afs_mount_params {
  */
 struct afs_wait_mode {
        /* RxRPC received message notification */
-       void (*rx_wakeup)(struct afs_call *call);
+       rxrpc_notify_rx_t notify_rx;
 
        /* synchronous call waiter and call dispatched notification */
        int (*wait)(struct afs_call *call);
@@ -75,10 +75,8 @@ struct afs_call {
        const struct afs_call_type *type;       /* type of call */
        const struct afs_wait_mode *wait_mode;  /* completion wait mode */
        wait_queue_head_t       waitq;          /* processes awaiting completion */
-       void (*async_workfn)(struct afs_call *call); /* asynchronous work function */
        struct work_struct      async_work;     /* asynchronous work processor */
        struct work_struct      work;           /* actual work processor */
-       struct sk_buff_head     rx_queue;       /* received packets */
        struct rxrpc_call       *rxcall;        /* RxRPC call handle */
        struct key              *key;           /* security for this call */
        struct afs_server       *server;        /* server affected by incoming CM call */
@@ -92,6 +90,7 @@ struct afs_call {
        void                    *reply4;        /* reply buffer (fourth part) */
        pgoff_t                 first;          /* first page in mapping to deal with */
        pgoff_t                 last;           /* last page in mapping to deal with */
+       size_t                  offset;         /* offset into received data store */
        enum {                                  /* call state */
                AFS_CALL_REQUESTING,    /* request is being sent for outgoing call */
                AFS_CALL_AWAIT_REPLY,   /* awaiting reply to outgoing call */
@@ -99,21 +98,18 @@ struct afs_call {
                AFS_CALL_AWAIT_REQUEST, /* awaiting request data on incoming call */
                AFS_CALL_REPLYING,      /* replying to incoming call */
                AFS_CALL_AWAIT_ACK,     /* awaiting final ACK of incoming call */
-               AFS_CALL_COMPLETE,      /* successfully completed */
-               AFS_CALL_BUSY,          /* server was busy */
-               AFS_CALL_ABORTED,       /* call was aborted */
-               AFS_CALL_ERROR,         /* call failed due to error */
+               AFS_CALL_COMPLETE,      /* Completed or failed */
        }                       state;
        int                     error;          /* error code */
+       u32                     abort_code;     /* Remote abort ID or 0 */
        unsigned                request_size;   /* size of request data */
        unsigned                reply_max;      /* maximum size of reply */
-       unsigned                reply_size;     /* current size of reply */
        unsigned                first_offset;   /* offset into mapping[first] */
        unsigned                last_to;        /* amount of mapping[last] */
-       unsigned                offset;         /* offset into received data store */
        unsigned char           unmarshall;     /* unmarshalling phase */
        bool                    incoming;       /* T if incoming call */
        bool                    send_pages;     /* T if data from mapping should be sent */
+       bool                    need_attention; /* T if RxRPC poked us */
        u16                     service_id;     /* RxRPC service ID to call */
        __be16                  port;           /* target UDP port */
        __be32                  operation_ID;   /* operation ID for an incoming call */
@@ -128,8 +124,7 @@ struct afs_call_type {
        /* deliver request or reply data to an call
         * - returning an error will cause the call to be aborted
         */
-       int (*deliver)(struct afs_call *call, struct sk_buff *skb,
-                      bool last);
+       int (*deliver)(struct afs_call *call);
 
        /* map an abort code to an error number */
        int (*abort_to_error)(u32 abort_code);
@@ -607,29 +602,22 @@ extern void afs_proc_cell_remove(struct afs_cell *);
 /*
  * rxrpc.c
  */
+extern struct socket *afs_socket;
+
 extern int afs_open_socket(void);
 extern void afs_close_socket(void);
-extern void afs_data_consumed(struct afs_call *, struct sk_buff *);
 extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t,
                         const struct afs_wait_mode *);
 extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *,
                                            size_t, size_t);
 extern void afs_flat_call_destructor(struct afs_call *);
-extern int afs_transfer_reply(struct afs_call *, struct sk_buff *, bool);
 extern void afs_send_empty_reply(struct afs_call *);
 extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
-extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
-                           size_t);
+extern int afs_extract_data(struct afs_call *, void *, size_t, bool);
 
-static inline int afs_data_complete(struct afs_call *call, struct sk_buff *skb,
-                                   bool last)
+static inline int afs_transfer_reply(struct afs_call *call)
 {
-       if (skb->len > 0)
-               return -EBADMSG;
-       afs_data_consumed(call, skb);
-       if (!last)
-               return -EAGAIN;
-       return 0;
+       return afs_extract_data(call, call->buffer, call->reply_max, false);
 }
 
 /*
@@ -654,7 +642,7 @@ do {                                                                \
 
 extern struct afs_server *afs_lookup_server(struct afs_cell *,
                                            const struct in_addr *);
-extern struct afs_server *afs_find_server(const struct in_addr *);
+extern struct afs_server *afs_find_server(const struct sockaddr_rxrpc *);
 extern void afs_put_server(struct afs_server *);
 extern void __exit afs_purge_servers(void);
 
index 35de0c0..0b187ef 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/completion.h>
 #include <linux/sched.h>
+#include <linux/random.h>
 #include "internal.h"
 
 MODULE_DESCRIPTION("AFS Client File System");
index 14d04c8..59bdaa7 100644 (file)
 #include "internal.h"
 #include "afs_cm.h"
 
-static struct socket *afs_socket; /* my RxRPC socket */
+struct socket *afs_socket; /* my RxRPC socket */
 static struct workqueue_struct *afs_async_calls;
+static struct afs_call *afs_spare_incoming_call;
 static atomic_t afs_outstanding_calls;
-static atomic_t afs_outstanding_skbs;
 
-static void afs_wake_up_call_waiter(struct afs_call *);
+static void afs_free_call(struct afs_call *);
+static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
 static int afs_wait_for_call_to_complete(struct afs_call *);
-static void afs_wake_up_async_call(struct afs_call *);
+static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
 static int afs_dont_wait_for_call_to_complete(struct afs_call *);
-static void afs_process_async_call(struct afs_call *);
-static void afs_rx_interceptor(struct sock *, unsigned long, struct sk_buff *);
-static int afs_deliver_cm_op_id(struct afs_call *, struct sk_buff *, bool);
+static void afs_process_async_call(struct work_struct *);
+static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
+static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
+static int afs_deliver_cm_op_id(struct afs_call *);
 
 /* synchronous call management */
 const struct afs_wait_mode afs_sync_call = {
-       .rx_wakeup      = afs_wake_up_call_waiter,
+       .notify_rx      = afs_wake_up_call_waiter,
        .wait           = afs_wait_for_call_to_complete,
 };
 
 /* asynchronous call management */
 const struct afs_wait_mode afs_async_call = {
-       .rx_wakeup      = afs_wake_up_async_call,
+       .notify_rx      = afs_wake_up_async_call,
        .wait           = afs_dont_wait_for_call_to_complete,
 };
 
 /* asynchronous incoming call management */
 static const struct afs_wait_mode afs_async_incoming_call = {
-       .rx_wakeup      = afs_wake_up_async_call,
+       .notify_rx      = afs_wake_up_async_call,
 };
 
 /* asynchronous incoming call initial processing */
@@ -53,17 +55,9 @@ static const struct afs_call_type afs_RXCMxxxx = {
        .abort_to_error = afs_abort_to_error,
 };
 
-static void afs_collect_incoming_call(struct work_struct *);
+static void afs_charge_preallocation(struct work_struct *);
 
-static struct sk_buff_head afs_incoming_calls;
-static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call);
-
-static void afs_async_workfn(struct work_struct *work)
-{
-       struct afs_call *call = container_of(work, struct afs_call, async_work);
-
-       call->async_workfn(call);
-}
+static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation);
 
 static int afs_wait_atomic_t(atomic_t *p)
 {
@@ -83,10 +77,8 @@ int afs_open_socket(void)
 
        _enter("");
 
-       skb_queue_head_init(&afs_incoming_calls);
-
        ret = -ENOMEM;
-       afs_async_calls = create_singlethread_workqueue("kafsd");
+       afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0);
        if (!afs_async_calls)
                goto error_0;
 
@@ -110,13 +102,15 @@ int afs_open_socket(void)
        if (ret < 0)
                goto error_2;
 
+       rxrpc_kernel_new_call_notification(socket, afs_rx_new_call,
+                                          afs_rx_discard_new_call);
+
        ret = kernel_listen(socket, INT_MAX);
        if (ret < 0)
                goto error_2;
 
-       rxrpc_kernel_intercept_rx_messages(socket, afs_rx_interceptor);
-
        afs_socket = socket;
+       afs_charge_preallocation(NULL);
        _leave(" = 0");
        return 0;
 
@@ -136,51 +130,27 @@ void afs_close_socket(void)
 {
        _enter("");
 
+       if (afs_spare_incoming_call) {
+               atomic_inc(&afs_outstanding_calls);
+               afs_free_call(afs_spare_incoming_call);
+               afs_spare_incoming_call = NULL;
+       }
+
+       _debug("outstanding %u", atomic_read(&afs_outstanding_calls));
        wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t,
                         TASK_UNINTERRUPTIBLE);
        _debug("no outstanding calls");
 
+       flush_workqueue(afs_async_calls);
+       kernel_sock_shutdown(afs_socket, SHUT_RDWR);
+       flush_workqueue(afs_async_calls);
        sock_release(afs_socket);
 
        _debug("dework");
        destroy_workqueue(afs_async_calls);
-
-       ASSERTCMP(atomic_read(&afs_outstanding_skbs), ==, 0);
        _leave("");
 }
 
-/*
- * Note that the data in a socket buffer is now consumed.
- */
-void afs_data_consumed(struct afs_call *call, struct sk_buff *skb)
-{
-       if (!skb) {
-               _debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs));
-               dump_stack();
-       } else {
-               _debug("DLVR %p{%u} [%d]",
-                      skb, skb->mark, atomic_read(&afs_outstanding_skbs));
-               rxrpc_kernel_data_consumed(call->rxcall, skb);
-       }
-}
-
-/*
- * free a socket buffer
- */
-static void afs_free_skb(struct sk_buff *skb)
-{
-       if (!skb) {
-               _debug("FREE NULL [%d]", atomic_read(&afs_outstanding_skbs));
-               dump_stack();
-       } else {
-               _debug("FREE %p{%u} [%d]",
-                      skb, skb->mark, atomic_read(&afs_outstanding_skbs));
-               if (atomic_dec_return(&afs_outstanding_skbs) == -1)
-                       BUG();
-               rxrpc_kernel_free_skb(skb);
-       }
-}
-
 /*
  * free a call
  */
@@ -191,7 +161,6 @@ static void afs_free_call(struct afs_call *call)
 
        ASSERTCMP(call->rxcall, ==, NULL);
        ASSERT(!work_pending(&call->async_work));
-       ASSERT(skb_queue_empty(&call->rx_queue));
        ASSERT(call->type->name != NULL);
 
        kfree(call->request);
@@ -207,7 +176,7 @@ static void afs_free_call(struct afs_call *call)
 static void afs_end_call_nofree(struct afs_call *call)
 {
        if (call->rxcall) {
-               rxrpc_kernel_end_call(call->rxcall);
+               rxrpc_kernel_end_call(afs_socket, call->rxcall);
                call->rxcall = NULL;
        }
        if (call->type->destructor)
@@ -227,7 +196,7 @@ static void afs_end_call(struct afs_call *call)
  * allocate a call with flat request and reply buffers
  */
 struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
-                                    size_t request_size, size_t reply_size)
+                                    size_t request_size, size_t reply_max)
 {
        struct afs_call *call;
 
@@ -241,7 +210,7 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
 
        call->type = type;
        call->request_size = request_size;
-       call->reply_max = reply_size;
+       call->reply_max = reply_max;
 
        if (request_size) {
                call->request = kmalloc(request_size, GFP_NOFS);
@@ -249,14 +218,13 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
                        goto nomem_free;
        }
 
-       if (reply_size) {
-               call->buffer = kmalloc(reply_size, GFP_NOFS);
+       if (reply_max) {
+               call->buffer = kmalloc(reply_max, GFP_NOFS);
                if (!call->buffer)
                        goto nomem_free;
        }
 
        init_waitqueue_head(&call->waitq);
-       skb_queue_head_init(&call->rx_queue);
        return call;
 
 nomem_free:
@@ -325,8 +293,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
                         * returns from sending the request */
                        if (first + loop >= last)
                                call->state = AFS_CALL_AWAIT_REPLY;
-                       ret = rxrpc_kernel_send_data(call->rxcall, msg,
-                                                    to - offset);
+                       ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
+                                                    msg, to - offset);
                        kunmap(pages[loop]);
                        if (ret < 0)
                                break;
@@ -354,7 +322,6 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
        struct msghdr msg;
        struct kvec iov[1];
        int ret;
-       struct sk_buff *skb;
 
        _enter("%x,{%d},", addr->s_addr, ntohs(call->port));
 
@@ -366,8 +333,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
               atomic_read(&afs_outstanding_calls));
 
        call->wait_mode = wait_mode;
-       call->async_workfn = afs_process_async_call;
-       INIT_WORK(&call->async_work, afs_async_workfn);
+       INIT_WORK(&call->async_work, afs_process_async_call);
 
        memset(&srx, 0, sizeof(srx));
        srx.srx_family = AF_RXRPC;
@@ -380,7 +346,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 
        /* create a call */
        rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key,
-                                        (unsigned long) call, gfp);
+                                        (unsigned long) call, gfp,
+                                        wait_mode->notify_rx);
        call->key = NULL;
        if (IS_ERR(rxcall)) {
                ret = PTR_ERR(rxcall);
@@ -406,7 +373,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
         * request */
        if (!call->send_pages)
                call->state = AFS_CALL_AWAIT_REPLY;
-       ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size);
+       ret = rxrpc_kernel_send_data(afs_socket, rxcall,
+                                    &msg, call->request_size);
        if (ret < 0)
                goto error_do_abort;
 
@@ -421,150 +389,85 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
        return wait_mode->wait(call);
 
 error_do_abort:
-       rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
-       while ((skb = skb_dequeue(&call->rx_queue)))
-               afs_free_skb(skb);
+       rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD");
 error_kill_call:
        afs_end_call(call);
        _leave(" = %d", ret);
        return ret;
 }
 
-/*
- * Handles intercepted messages that were arriving in the socket's Rx queue.
- *
- * Called from the AF_RXRPC call processor in waitqueue process context.  For
- * each call, it is guaranteed this will be called in order of packet to be
- * delivered.
- */
-static void afs_rx_interceptor(struct sock *sk, unsigned long user_call_ID,
-                              struct sk_buff *skb)
-{
-       struct afs_call *call = (struct afs_call *) user_call_ID;
-
-       _enter("%p,,%u", call, skb->mark);
-
-       _debug("ICPT %p{%u} [%d]",
-              skb, skb->mark, atomic_read(&afs_outstanding_skbs));
-
-       ASSERTCMP(sk, ==, afs_socket->sk);
-       atomic_inc(&afs_outstanding_skbs);
-
-       if (!call) {
-               /* its an incoming call for our callback service */
-               skb_queue_tail(&afs_incoming_calls, skb);
-               queue_work(afs_wq, &afs_collect_incoming_call_work);
-       } else {
-               /* route the messages directly to the appropriate call */
-               skb_queue_tail(&call->rx_queue, skb);
-               call->wait_mode->rx_wakeup(call);
-       }
-
-       _leave("");
-}
-
 /*
  * deliver messages to a call
  */
 static void afs_deliver_to_call(struct afs_call *call)
 {
-       struct sk_buff *skb;
-       bool last;
        u32 abort_code;
        int ret;
 
-       _enter("");
-
-       while ((call->state == AFS_CALL_AWAIT_REPLY ||
-               call->state == AFS_CALL_AWAIT_OP_ID ||
-               call->state == AFS_CALL_AWAIT_REQUEST ||
-               call->state == AFS_CALL_AWAIT_ACK) &&
-              (skb = skb_dequeue(&call->rx_queue))) {
-               switch (skb->mark) {
-               case RXRPC_SKB_MARK_DATA:
-                       _debug("Rcv DATA");
-                       last = rxrpc_kernel_is_data_last(skb);
-                       ret = call->type->deliver(call, skb, last);
-                       switch (ret) {
-                       case -EAGAIN:
-                               if (last) {
-                                       _debug("short data");
-                                       goto unmarshal_error;
-                               }
-                               break;
-                       case 0:
-                               ASSERT(last);
-                               if (call->state == AFS_CALL_AWAIT_REPLY)
-                                       call->state = AFS_CALL_COMPLETE;
-                               break;
-                       case -ENOTCONN:
-                               abort_code = RX_CALL_DEAD;
-                               goto do_abort;
-                       case -ENOTSUPP:
-                               abort_code = RX_INVALID_OPERATION;
-                               goto do_abort;
-                       default:
-                       unmarshal_error:
-                               abort_code = RXGEN_CC_UNMARSHAL;
-                               if (call->state != AFS_CALL_AWAIT_REPLY)
-                                       abort_code = RXGEN_SS_UNMARSHAL;
-                       do_abort:
-                               rxrpc_kernel_abort_call(call->rxcall,
-                                                       abort_code);
-                               call->error = ret;
-                               call->state = AFS_CALL_ERROR;
-                               break;
+       _enter("%s", call->type->name);
+
+       while (call->state == AFS_CALL_AWAIT_REPLY ||
+              call->state == AFS_CALL_AWAIT_OP_ID ||
+              call->state == AFS_CALL_AWAIT_REQUEST ||
+              call->state == AFS_CALL_AWAIT_ACK
+              ) {
+               if (call->state == AFS_CALL_AWAIT_ACK) {
+                       size_t offset = 0;
+                       ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall,
+                                                    NULL, 0, &offset, false,
+                                                    &call->abort_code);
+                       if (ret == -EINPROGRESS || ret == -EAGAIN)
+                               return;
+                       if (ret == 1) {
+                               call->state = AFS_CALL_COMPLETE;
+                               goto done;
                        }
-                       break;
-               case RXRPC_SKB_MARK_FINAL_ACK:
-                       _debug("Rcv ACK");
-                       call->state = AFS_CALL_COMPLETE;
-                       break;
-               case RXRPC_SKB_MARK_BUSY:
-                       _debug("Rcv BUSY");
-                       call->error = -EBUSY;
-                       call->state = AFS_CALL_BUSY;
-                       break;
-               case RXRPC_SKB_MARK_REMOTE_ABORT:
-                       abort_code = rxrpc_kernel_get_abort_code(skb);
-                       call->error = call->type->abort_to_error(abort_code);
-                       call->state = AFS_CALL_ABORTED;
-                       _debug("Rcv ABORT %u -> %d", abort_code, call->error);
-                       break;
-               case RXRPC_SKB_MARK_LOCAL_ABORT:
-                       abort_code = rxrpc_kernel_get_abort_code(skb);
-                       call->error = call->type->abort_to_error(abort_code);
-                       call->state = AFS_CALL_ABORTED;
-                       _debug("Loc ABORT %u -> %d", abort_code, call->error);
-                       break;
-               case RXRPC_SKB_MARK_NET_ERROR:
-                       call->error = -rxrpc_kernel_get_error_number(skb);
-                       call->state = AFS_CALL_ERROR;
-                       _debug("Rcv NET ERROR %d", call->error);
-                       break;
-               case RXRPC_SKB_MARK_LOCAL_ERROR:
-                       call->error = -rxrpc_kernel_get_error_number(skb);
-                       call->state = AFS_CALL_ERROR;
-                       _debug("Rcv LOCAL ERROR %d", call->error);
-                       break;
-               default:
-                       BUG();
-                       break;
+                       return;
                }
 
-               afs_free_skb(skb);
-       }
-
-       /* make sure the queue is empty if the call is done with (we might have
-        * aborted the call early because of an unmarshalling error) */
-       if (call->state >= AFS_CALL_COMPLETE) {
-               while ((skb = skb_dequeue(&call->rx_queue)))
-                       afs_free_skb(skb);
-               if (call->incoming)
-                       afs_end_call(call);
+               ret = call->type->deliver(call);
+               switch (ret) {
+               case 0:
+                       if (call->state == AFS_CALL_AWAIT_REPLY)
+                               call->state = AFS_CALL_COMPLETE;
+                       goto done;
+               case -EINPROGRESS:
+               case -EAGAIN:
+                       goto out;
+               case -ENOTCONN:
+                       abort_code = RX_CALL_DEAD;
+                       rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+                                               abort_code, -ret, "KNC");
+                       goto do_abort;
+               case -ENOTSUPP:
+                       abort_code = RX_INVALID_OPERATION;
+                       rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+                                               abort_code, -ret, "KIV");
+                       goto do_abort;
+               case -ENODATA:
+               case -EBADMSG:
+               case -EMSGSIZE:
+               default:
+                       abort_code = RXGEN_CC_UNMARSHAL;
+                       if (call->state != AFS_CALL_AWAIT_REPLY)
+                               abort_code = RXGEN_SS_UNMARSHAL;
+                       rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+                                               abort_code, EBADMSG, "KUM");
+                       goto do_abort;
+               }
        }
 
+done:
+       if (call->state == AFS_CALL_COMPLETE && call->incoming)
+               afs_end_call(call);
+out:
        _leave("");
+       return;
+
+do_abort:
+       call->error = ret;
+       call->state = AFS_CALL_COMPLETE;
+       goto done;
 }
 
 /*
@@ -572,7 +475,7 @@ static void afs_deliver_to_call(struct afs_call *call)
  */
 static int afs_wait_for_call_to_complete(struct afs_call *call)
 {
-       struct sk_buff *skb;
+       const char *abort_why;
        int ret;
 
        DECLARE_WAITQUEUE(myself, current);
@@ -584,15 +487,18 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
                set_current_state(TASK_INTERRUPTIBLE);
 
                /* deliver any messages that are in the queue */
-               if (!skb_queue_empty(&call->rx_queue)) {
+               if (call->state < AFS_CALL_COMPLETE && call->need_attention) {
+                       call->need_attention = false;
                        __set_current_state(TASK_RUNNING);
                        afs_deliver_to_call(call);
                        continue;
                }
 
+               abort_why = "KWC";
                ret = call->error;
-               if (call->state >= AFS_CALL_COMPLETE)
+               if (call->state == AFS_CALL_COMPLETE)
                        break;
+               abort_why = "KWI";
                ret = -EINTR;
                if (signal_pending(current))
                        break;
@@ -605,9 +511,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
        /* kill the call */
        if (call->state < AFS_CALL_COMPLETE) {
                _debug("call incomplete");
-               rxrpc_kernel_abort_call(call->rxcall, RX_CALL_DEAD);
-               while ((skb = skb_dequeue(&call->rx_queue)))
-                       afs_free_skb(skb);
+               rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+                                       RX_CALL_DEAD, -ret, abort_why);
        }
 
        _debug("call complete");
@@ -619,17 +524,24 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
 /*
  * wake up a waiting call
  */
-static void afs_wake_up_call_waiter(struct afs_call *call)
+static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall,
+                                   unsigned long call_user_ID)
 {
+       struct afs_call *call = (struct afs_call *)call_user_ID;
+
+       call->need_attention = true;
        wake_up(&call->waitq);
 }
 
 /*
  * wake up an asynchronous call
  */
-static void afs_wake_up_async_call(struct afs_call *call)
+static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
+                                  unsigned long call_user_ID)
 {
-       _enter("");
+       struct afs_call *call = (struct afs_call *)call_user_ID;
+
+       call->need_attention = true;
        queue_work(afs_async_calls, &call->async_work);
 }
 
@@ -647,8 +559,10 @@ static int afs_dont_wait_for_call_to_complete(struct afs_call *call)
 /*
  * delete an asynchronous call
  */
-static void afs_delete_async_call(struct afs_call *call)
+static void afs_delete_async_call(struct work_struct *work)
 {
+       struct afs_call *call = container_of(work, struct afs_call, async_work);
+
        _enter("");
 
        afs_free_call(call);
@@ -658,17 +572,19 @@ static void afs_delete_async_call(struct afs_call *call)
 
 /*
  * perform processing on an asynchronous call
- * - on a multiple-thread workqueue this work item may try to run on several
- *   CPUs at the same time
  */
-static void afs_process_async_call(struct afs_call *call)
+static void afs_process_async_call(struct work_struct *work)
 {
+       struct afs_call *call = container_of(work, struct afs_call, async_work);
+
        _enter("");
 
-       if (!skb_queue_empty(&call->rx_queue))
+       if (call->state < AFS_CALL_COMPLETE && call->need_attention) {
+               call->need_attention = false;
                afs_deliver_to_call(call);
+       }
 
-       if (call->state >= AFS_CALL_COMPLETE && call->wait_mode) {
+       if (call->state == AFS_CALL_COMPLETE && call->wait_mode) {
                if (call->wait_mode->async_complete)
                        call->wait_mode->async_complete(call->reply,
                                                        call->error);
@@ -679,122 +595,93 @@ static void afs_process_async_call(struct afs_call *call)
 
                /* we can't just delete the call because the work item may be
                 * queued */
-               call->async_workfn = afs_delete_async_call;
+               call->async_work.func = afs_delete_async_call;
                queue_work(afs_async_calls, &call->async_work);
        }
 
        _leave("");
 }
 
-/*
- * Empty a socket buffer into a flat reply buffer.
- */
-int afs_transfer_reply(struct afs_call *call, struct sk_buff *skb, bool last)
+static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID)
 {
-       size_t len = skb->len;
-
-       if (len > call->reply_max - call->reply_size) {
-               _leave(" = -EBADMSG [%zu > %u]",
-                      len, call->reply_max - call->reply_size);
-               return -EBADMSG;
-       }
+       struct afs_call *call = (struct afs_call *)user_call_ID;
 
-       if (len > 0) {
-               if (skb_copy_bits(skb, 0, call->buffer + call->reply_size,
-                                 len) < 0)
-                       BUG();
-               call->reply_size += len;
-       }
-
-       afs_data_consumed(call, skb);
-       if (!last)
-               return -EAGAIN;
-
-       if (call->reply_size != call->reply_max) {
-               _leave(" = -EBADMSG [%u != %u]",
-                      call->reply_size, call->reply_max);
-               return -EBADMSG;
-       }
-       return 0;
+       call->rxcall = rxcall;
 }
 
 /*
- * accept the backlog of incoming calls
+ * Charge the incoming call preallocation.
  */
-static void afs_collect_incoming_call(struct work_struct *work)
+static void afs_charge_preallocation(struct work_struct *work)
 {
-       struct rxrpc_call *rxcall;
-       struct afs_call *call = NULL;
-       struct sk_buff *skb;
-
-       while ((skb = skb_dequeue(&afs_incoming_calls))) {
-               _debug("new call");
-
-               /* don't need the notification */
-               afs_free_skb(skb);
+       struct afs_call *call = afs_spare_incoming_call;
 
+       for (;;) {
                if (!call) {
                        call = kzalloc(sizeof(struct afs_call), GFP_KERNEL);
-                       if (!call) {
-                               rxrpc_kernel_reject_call(afs_socket);
-                               return;
-                       }
+                       if (!call)
+                               break;
 
-                       call->async_workfn = afs_process_async_call;
-                       INIT_WORK(&call->async_work, afs_async_workfn);
+                       INIT_WORK(&call->async_work, afs_process_async_call);
                        call->wait_mode = &afs_async_incoming_call;
                        call->type = &afs_RXCMxxxx;
                        init_waitqueue_head(&call->waitq);
-                       skb_queue_head_init(&call->rx_queue);
                        call->state = AFS_CALL_AWAIT_OP_ID;
-
-                       _debug("CALL %p{%s} [%d]",
-                              call, call->type->name,
-                              atomic_read(&afs_outstanding_calls));
-                       atomic_inc(&afs_outstanding_calls);
                }
 
-               rxcall = rxrpc_kernel_accept_call(afs_socket,
-                                                 (unsigned long) call);
-               if (!IS_ERR(rxcall)) {
-                       call->rxcall = rxcall;
-                       call = NULL;
-               }
+               if (rxrpc_kernel_charge_accept(afs_socket,
+                                              afs_wake_up_async_call,
+                                              afs_rx_attach,
+                                              (unsigned long)call,
+                                              GFP_KERNEL) < 0)
+                       break;
+               call = NULL;
        }
+       afs_spare_incoming_call = call;
+}
+
+/*
+ * Discard a preallocated call when a socket is shut down.
+ */
+static void afs_rx_discard_new_call(struct rxrpc_call *rxcall,
+                                   unsigned long user_call_ID)
+{
+       struct afs_call *call = (struct afs_call *)user_call_ID;
 
-       if (call)
-               afs_free_call(call);
+       atomic_inc(&afs_outstanding_calls);
+       call->rxcall = NULL;
+       afs_free_call(call);
+}
+
+/*
+ * Notification of an incoming call.
+ */
+static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall,
+                           unsigned long user_call_ID)
+{
+       atomic_inc(&afs_outstanding_calls);
+       queue_work(afs_wq, &afs_charge_preallocation_work);
 }
 
 /*
  * Grab the operation ID from an incoming cache manager call.  The socket
  * buffer is discarded on error or if we don't yet have sufficient data.
  */
-static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
-                               bool last)
+static int afs_deliver_cm_op_id(struct afs_call *call)
 {
-       size_t len = skb->len;
-       void *oibuf = (void *) &call->operation_ID;
+       int ret;
 
-       _enter("{%u},{%zu},%d", call->offset, len, last);
+       _enter("{%zu}", call->offset);
 
        ASSERTCMP(call->offset, <, 4);
 
        /* the operation ID forms the first four bytes of the request data */
-       len = min_t(size_t, len, 4 - call->offset);
-       if (skb_copy_bits(skb, 0, oibuf + call->offset, len) < 0)
-               BUG();
-       if (!pskb_pull(skb, len))
-               BUG();
-       call->offset += len;
-
-       if (call->offset < 4) {
-               afs_data_consumed(call, skb);
-               _leave(" = -EAGAIN");
-               return -EAGAIN;
-       }
+       ret = afs_extract_data(call, &call->operation_ID, 4, true);
+       if (ret < 0)
+               return ret;
 
        call->state = AFS_CALL_AWAIT_REQUEST;
+       call->offset = 0;
 
        /* ask the cache manager to route the call (it'll change the call type
         * if successful) */
@@ -803,7 +690,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
 
        /* pass responsibility for the remainer of this message off to the
         * cache manager op */
-       return call->type->deliver(call, skb, last);
+       return call->type->deliver(call);
 }
 
 /*
@@ -823,14 +710,15 @@ void afs_send_empty_reply(struct afs_call *call)
        msg.msg_flags           = 0;
 
        call->state = AFS_CALL_AWAIT_ACK;
-       switch (rxrpc_kernel_send_data(call->rxcall, &msg, 0)) {
+       switch (rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, 0)) {
        case 0:
                _leave(" [replied]");
                return;
 
        case -ENOMEM:
                _debug("oom");
-               rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
+               rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+                                       RX_USER_ABORT, ENOMEM, "KOO");
        default:
                afs_end_call(call);
                _leave(" [error]");
@@ -859,7 +747,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
        msg.msg_flags           = 0;
 
        call->state = AFS_CALL_AWAIT_ACK;
-       n = rxrpc_kernel_send_data(call->rxcall, &msg, len);
+       n = rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, len);
        if (n >= 0) {
                /* Success */
                _leave(" [replied]");
@@ -868,7 +756,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 
        if (n == -ENOMEM) {
                _debug("oom");
-               rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
+               rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+                                       RX_USER_ABORT, ENOMEM, "KOO");
        }
        afs_end_call(call);
        _leave(" [error]");
@@ -877,25 +766,40 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 /*
  * Extract a piece of data from the received data socket buffers.
  */
-int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
-                    bool last, void *buf, size_t count)
+int afs_extract_data(struct afs_call *call, void *buf, size_t count,
+                    bool want_more)
 {
-       size_t len = skb->len;
+       int ret;
 
-       _enter("{%u},{%zu},%d,,%zu", call->offset, len, last, count);
+       _enter("{%s,%zu},,%zu,%d",
+              call->type->name, call->offset, count, want_more);
 
-       ASSERTCMP(call->offset, <, count);
+       ASSERTCMP(call->offset, <=, count);
 
-       len = min_t(size_t, len, count - call->offset);
-       if (skb_copy_bits(skb, 0, buf + call->offset, len) < 0 ||
-           !pskb_pull(skb, len))
-               BUG();
-       call->offset += len;
+       ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall,
+                                    buf, count, &call->offset,
+                                    want_more, &call->abort_code);
+       if (ret == 0 || ret == -EAGAIN)
+               return ret;
 
-       if (call->offset < count) {
-               afs_data_consumed(call, skb);
-               _leave(" = -EAGAIN");
-               return -EAGAIN;
+       if (ret == 1) {
+               switch (call->state) {
+               case AFS_CALL_AWAIT_REPLY:
+                       call->state = AFS_CALL_COMPLETE;
+                       break;
+               case AFS_CALL_AWAIT_REQUEST:
+                       call->state = AFS_CALL_REPLYING;
+                       break;
+               default:
+                       break;
+               }
+               return 0;
        }
-       return 0;
+
+       if (ret == -ECONNABORTED)
+               call->error = call->type->abort_to_error(call->abort_code);
+       else
+               call->error = ret;
+       call->state = AFS_CALL_COMPLETE;
+       return ret;
 }
index f342acf..d4066ab 100644 (file)
@@ -178,13 +178,18 @@ server_in_two_cells:
 /*
  * look up a server by its IP address
  */
-struct afs_server *afs_find_server(const struct in_addr *_addr)
+struct afs_server *afs_find_server(const struct sockaddr_rxrpc *srx)
 {
        struct afs_server *server = NULL;
        struct rb_node *p;
-       struct in_addr addr = *_addr;
+       struct in_addr addr = srx->transport.sin.sin_addr;
 
-       _enter("%pI4", &addr.s_addr);
+       _enter("{%d,%pI4}", srx->transport.family, &addr.s_addr);
+
+       if (srx->transport.family != AF_INET) {
+               WARN(true, "AFS does not yes support non-IPv4 addresses\n");
+               return NULL;
+       }
 
        read_lock(&afs_servers_lock);
 
index f94d1ab..94bcd97 100644 (file)
@@ -58,17 +58,16 @@ static int afs_vl_abort_to_error(u32 abort_code)
 /*
  * deliver reply data to a VL.GetEntryByXXX call
  */
-static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call,
-                                          struct sk_buff *skb, bool last)
+static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call)
 {
        struct afs_cache_vlocation *entry;
        __be32 *bp;
        u32 tmp;
        int loop, ret;
 
-       _enter(",,%u", last);
+       _enter("");
 
-       ret = afs_transfer_reply(call, skb, last);
+       ret = afs_transfer_reply(call);
        if (ret < 0)
                return ret;
 
index 5297678..45a8639 100644 (file)
@@ -594,8 +594,8 @@ static void afs_vlocation_reaper(struct work_struct *work)
  */
 int __init afs_vlocation_update_init(void)
 {
-       afs_vlocation_update_worker =
-               create_singlethread_workqueue("kafs_vlupdated");
+       afs_vlocation_update_worker = alloc_workqueue("kafs_vlupdated",
+                                                     WQ_MEM_RECLAIM, 0);
        return afs_vlocation_update_worker ? 0 : -ENOMEM;
 }
 
index 7f6aff3..e5495f3 100644 (file)
@@ -853,6 +853,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
                current->flags |= PF_RANDOMIZE;
 
        setup_new_exec(bprm);
+       install_exec_creds(bprm);
 
        /* Do this so that we can load the interpreter, if need be.  We will
           change some of these later */
@@ -1044,7 +1045,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
                goto out;
 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 
-       install_exec_creds(bprm);
        retval = create_elf_tables(bprm, &loc->elf_ex,
                          load_addr, interp_load_addr);
        if (retval < 0)
index c3cdde8..08ae993 100644 (file)
@@ -249,7 +249,8 @@ struct super_block *freeze_bdev(struct block_device *bdev)
                 * thaw_bdev drops it.
                 */
                sb = get_super(bdev);
-               drop_super(sb);
+               if (sb)
+                       drop_super(sb);
                mutex_unlock(&bdev->bd_fsfreeze_mutex);
                return sb;
        }
@@ -646,7 +647,7 @@ static struct dentry *bd_mount(struct file_system_type *fs_type,
 {
        struct dentry *dent;
        dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
-       if (dent)
+       if (!IS_ERR(dent))
                dent->d_sb->s_iflags |= SB_I_CGROUPWB;
        return dent;
 }
index 2b88439..455a6b2 100644 (file)
@@ -589,6 +589,7 @@ static void __merge_refs(struct list_head *head, int mode)
 
                        list_del(&ref2->list);
                        kmem_cache_free(btrfs_prelim_ref_cache, ref2);
+                       cond_resched();
                }
 
        }
index 2fe8f89..33fe035 100644 (file)
@@ -427,6 +427,7 @@ struct btrfs_space_info {
        struct list_head ro_bgs;
        struct list_head priority_tickets;
        struct list_head tickets;
+       u64 tickets_id;
 
        struct rw_semaphore groups_sem;
        /* for block groups in our same type */
@@ -1028,6 +1029,7 @@ struct btrfs_fs_info {
        struct btrfs_workqueue *qgroup_rescan_workers;
        struct completion qgroup_rescan_completion;
        struct btrfs_work qgroup_rescan_work;
+       bool qgroup_rescan_running;     /* protected by qgroup_rescan_lock */
 
        /* filesystem state */
        unsigned long fs_state;
@@ -1079,6 +1081,8 @@ struct btrfs_fs_info {
        struct list_head pinned_chunks;
 
        int creating_free_space_tree;
+       /* Used to record internally whether fs has been frozen */
+       int fs_frozen;
 };
 
 struct btrfs_subvolume_writers {
@@ -2578,7 +2582,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root,
                                   u64 root_objectid, u64 owner, u64 offset,
                                   struct btrfs_key *ins);
-int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
+int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
                         u64 min_alloc_size, u64 empty_size, u64 hint_byte,
                         struct btrfs_key *ins, int is_data, int delalloc);
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
index d9ddcfc..ac02e04 100644 (file)
@@ -541,7 +541,6 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
        struct btrfs_delayed_ref_head *existing;
        struct btrfs_delayed_ref_head *head_ref = NULL;
        struct btrfs_delayed_ref_root *delayed_refs;
-       struct btrfs_qgroup_extent_record *qexisting;
        int count_mod = 1;
        int must_insert_reserved = 0;
 
@@ -606,10 +605,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
                qrecord->num_bytes = num_bytes;
                qrecord->old_roots = NULL;
 
-               qexisting = btrfs_qgroup_insert_dirty_extent(fs_info,
-                                                            delayed_refs,
-                                                            qrecord);
-               if (qexisting)
+               if(btrfs_qgroup_insert_dirty_extent_nolock(fs_info,
+                                       delayed_refs, qrecord))
                        kfree(qrecord);
        }
 
index 59febfb..54bc8c7 100644 (file)
@@ -559,8 +559,29 @@ static noinline int check_leaf(struct btrfs_root *root,
        u32 nritems = btrfs_header_nritems(leaf);
        int slot;
 
-       if (nritems == 0)
+       if (nritems == 0) {
+               struct btrfs_root *check_root;
+
+               key.objectid = btrfs_header_owner(leaf);
+               key.type = BTRFS_ROOT_ITEM_KEY;
+               key.offset = (u64)-1;
+
+               check_root = btrfs_get_fs_root(root->fs_info, &key, false);
+               /*
+                * The only reason we also check NULL here is that during
+                * open_ctree() some roots has not yet been set up.
+                */
+               if (!IS_ERR_OR_NULL(check_root)) {
+                       /* if leaf is the root, then it's fine */
+                       if (leaf->start !=
+                           btrfs_root_bytenr(&check_root->root_item)) {
+                               CORRUPT("non-root leaf's nritems is 0",
+                                       leaf, root, 0);
+                               return -EIO;
+                       }
+               }
                return 0;
+       }
 
        /* Check the 0 item */
        if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
@@ -612,6 +633,19 @@ static noinline int check_leaf(struct btrfs_root *root,
        return 0;
 }
 
+static int check_node(struct btrfs_root *root, struct extent_buffer *node)
+{
+       unsigned long nr = btrfs_header_nritems(node);
+
+       if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
+               btrfs_crit(root->fs_info,
+                          "corrupt node: block %llu root %llu nritems %lu",
+                          node->start, root->objectid, nr);
+               return -EIO;
+       }
+       return 0;
+}
+
 static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
                                      u64 phy_offset, struct page *page,
                                      u64 start, u64 end, int mirror)
@@ -682,6 +716,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
                ret = -EIO;
        }
 
+       if (found_level > 0 && check_node(root, eb))
+               ret = -EIO;
+
        if (!ret)
                set_extent_buffer_uptodate(eb);
 err:
@@ -1618,8 +1655,8 @@ fail:
        return ret;
 }
 
-static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
-                                              u64 root_id)
+struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+                                       u64 root_id)
 {
        struct btrfs_root *root;
 
@@ -2298,6 +2335,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
        fs_info->quota_enabled = 0;
        fs_info->pending_quota_state = 0;
        fs_info->qgroup_ulist = NULL;
+       fs_info->qgroup_rescan_running = false;
        mutex_init(&fs_info->qgroup_rescan_lock);
 }
 
@@ -2624,6 +2662,7 @@ int open_ctree(struct super_block *sb,
        atomic_set(&fs_info->qgroup_op_seq, 0);
        atomic_set(&fs_info->reada_works_cnt, 0);
        atomic64_set(&fs_info->tree_mod_seq, 0);
+       fs_info->fs_frozen = 0;
        fs_info->sb = sb;
        fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
        fs_info->metadata_ratio = 0;
@@ -3739,8 +3778,15 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
        if (btrfs_root_refs(&root->root_item) == 0)
                synchronize_srcu(&fs_info->subvol_srcu);
 
-       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
                btrfs_free_log(NULL, root);
+               if (root->reloc_root) {
+                       free_extent_buffer(root->reloc_root->node);
+                       free_extent_buffer(root->reloc_root->commit_root);
+                       btrfs_put_fs_root(root->reloc_root);
+                       root->reloc_root = NULL;
+               }
+       }
 
        if (root->free_ino_pinned)
                __btrfs_remove_free_space_cache(root->free_ino_pinned);
@@ -3851,7 +3897,7 @@ void close_ctree(struct btrfs_root *root)
        smp_mb();
 
        /* wait for the qgroup rescan worker to stop */
-       btrfs_qgroup_wait_for_completion(fs_info);
+       btrfs_qgroup_wait_for_completion(fs_info, false);
 
        /* wait for the uuid_scan task to finish */
        down(&fs_info->uuid_tree_rescan_sem);
index b3207a0..f19a982 100644 (file)
@@ -68,6 +68,8 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
 struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
                                      struct btrfs_key *location);
 int btrfs_init_fs_root(struct btrfs_root *root);
+struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+                                       u64 root_id);
 int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
                         struct btrfs_root *root);
 void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
index 61b494e..38c2df8 100644 (file)
@@ -60,21 +60,6 @@ enum {
        CHUNK_ALLOC_FORCE = 2,
 };
 
-/*
- * Control how reservations are dealt with.
- *
- * RESERVE_FREE - freeing a reservation.
- * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
- *   ENOSPC accounting
- * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
- *   bytes_may_use as the ENOSPC accounting is done elsewhere
- */
-enum {
-       RESERVE_FREE = 0,
-       RESERVE_ALLOC = 1,
-       RESERVE_ALLOC_NO_ACCOUNT = 2,
-};
-
 static int update_block_group(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root, u64 bytenr,
                              u64 num_bytes, int alloc);
@@ -104,9 +89,10 @@ static int find_next_key(struct btrfs_path *path, int level,
                         struct btrfs_key *key);
 static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
                            int dump_block_groups);
-static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
-                                      u64 num_bytes, int reserve,
-                                      int delalloc);
+static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
+                                   u64 ram_bytes, u64 num_bytes, int delalloc);
+static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
+                                    u64 num_bytes, int delalloc);
 static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
                               u64 num_bytes);
 int btrfs_pin_extent(struct btrfs_root *root,
@@ -3501,7 +3487,6 @@ again:
                dcs = BTRFS_DC_SETUP;
        else if (ret == -ENOSPC)
                set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
-       btrfs_free_reserved_data_space(inode, 0, num_pages);
 
 out_put:
        iput(inode);
@@ -4472,6 +4457,15 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
        }
 }
 
+/*
+ * If force is CHUNK_ALLOC_FORCE:
+ *    - return 1 if it successfully allocates a chunk,
+ *    - return errors including -ENOSPC otherwise.
+ * If force is NOT CHUNK_ALLOC_FORCE:
+ *    - return 0 if it doesn't need to allocate a new chunk,
+ *    - return 1 if it successfully allocates a chunk,
+ *    - return errors including -ENOSPC otherwise.
+ */
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                          struct btrfs_root *extent_root, u64 flags, int force)
 {
@@ -4882,7 +4876,7 @@ static int flush_space(struct btrfs_root *root,
                                     btrfs_get_alloc_profile(root, 0),
                                     CHUNK_ALLOC_NO_FORCE);
                btrfs_end_transaction(trans, root);
-               if (ret == -ENOSPC)
+               if (ret > 0 || ret == -ENOSPC)
                        ret = 0;
                break;
        case COMMIT_TRANS:
@@ -4907,11 +4901,6 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
        u64 expected;
        u64 to_reclaim = 0;
 
-       to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
-       if (can_overcommit(root, space_info, to_reclaim,
-                          BTRFS_RESERVE_FLUSH_ALL))
-               return 0;
-
        list_for_each_entry(ticket, &space_info->tickets, list)
                to_reclaim += ticket->bytes;
        list_for_each_entry(ticket, &space_info->priority_tickets, list)
@@ -4919,6 +4908,11 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
        if (to_reclaim)
                return to_reclaim;
 
+       to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
+       if (can_overcommit(root, space_info, to_reclaim,
+                          BTRFS_RESERVE_FLUSH_ALL))
+               return 0;
+
        used = space_info->bytes_used + space_info->bytes_reserved +
               space_info->bytes_pinned + space_info->bytes_readonly +
               space_info->bytes_may_use;
@@ -4972,12 +4966,12 @@ static void wake_all_tickets(struct list_head *head)
  */
 static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
 {
-       struct reserve_ticket *last_ticket = NULL;
        struct btrfs_fs_info *fs_info;
        struct btrfs_space_info *space_info;
        u64 to_reclaim;
        int flush_state;
        int commit_cycles = 0;
+       u64 last_tickets_id;
 
        fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
        space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
@@ -4990,8 +4984,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
                spin_unlock(&space_info->lock);
                return;
        }
-       last_ticket = list_first_entry(&space_info->tickets,
-                                      struct reserve_ticket, list);
+       last_tickets_id = space_info->tickets_id;
        spin_unlock(&space_info->lock);
 
        flush_state = FLUSH_DELAYED_ITEMS_NR;
@@ -5011,10 +5004,10 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
                                                              space_info);
                ticket = list_first_entry(&space_info->tickets,
                                          struct reserve_ticket, list);
-               if (last_ticket == ticket) {
+               if (last_tickets_id == space_info->tickets_id) {
                        flush_state++;
                } else {
-                       last_ticket = ticket;
+                       last_tickets_id = space_info->tickets_id;
                        flush_state = FLUSH_DELAYED_ITEMS_NR;
                        if (commit_cycles)
                                commit_cycles--;
@@ -5390,6 +5383,7 @@ again:
                        list_del_init(&ticket->list);
                        num_bytes -= ticket->bytes;
                        ticket->bytes = 0;
+                       space_info->tickets_id++;
                        wake_up(&ticket->wait);
                } else {
                        ticket->bytes -= num_bytes;
@@ -5432,6 +5426,7 @@ again:
                        num_bytes -= ticket->bytes;
                        space_info->bytes_may_use += ticket->bytes;
                        ticket->bytes = 0;
+                       space_info->tickets_id++;
                        wake_up(&ticket->wait);
                } else {
                        trace_btrfs_space_reservation(fs_info, "space_info",
@@ -6497,19 +6492,15 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
 }
 
 /**
- * btrfs_update_reserved_bytes - update the block_group and space info counters
+ * btrfs_add_reserved_bytes - update the block_group and space info counters
  * @cache:     The cache we are manipulating
+ * @ram_bytes:  The number of bytes of file content, and will be same to
+ *              @num_bytes except for the compress path.
  * @num_bytes: The number of bytes in question
- * @reserve:   One of the reservation enums
  * @delalloc:   The blocks are allocated for the delalloc write
  *
- * This is called by the allocator when it reserves space, or by somebody who is
- * freeing space that was never actually used on disk.  For example if you
- * reserve some space for a new leaf in transaction A and before transaction A
- * commits you free that leaf, you call this with reserve set to 0 in order to
- * clear the reservation.
- *
- * Metadata reservations should be called with RESERVE_ALLOC so we do the proper
+ * This is called by the allocator when it reserves space. Metadata
+ * reservations should be called with RESERVE_ALLOC so we do the proper
  * ENOSPC accounting.  For data we handle the reservation through clearing the
  * delalloc bits in the io_tree.  We have to do this since we could end up
  * allocating less disk space for the amount of data we have reserved in the
@@ -6519,44 +6510,63 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
  * make the reservation and return -EAGAIN, otherwise this function always
  * succeeds.
  */
-static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
-                                      u64 num_bytes, int reserve, int delalloc)
+static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
+                                   u64 ram_bytes, u64 num_bytes, int delalloc)
 {
        struct btrfs_space_info *space_info = cache->space_info;
        int ret = 0;
 
        spin_lock(&space_info->lock);
        spin_lock(&cache->lock);
-       if (reserve != RESERVE_FREE) {
-               if (cache->ro) {
-                       ret = -EAGAIN;
-               } else {
-                       cache->reserved += num_bytes;
-                       space_info->bytes_reserved += num_bytes;
-                       if (reserve == RESERVE_ALLOC) {
-                               trace_btrfs_space_reservation(cache->fs_info,
-                                               "space_info", space_info->flags,
-                                               num_bytes, 0);
-                               space_info->bytes_may_use -= num_bytes;
-                       }
-
-                       if (delalloc)
-                               cache->delalloc_bytes += num_bytes;
-               }
+       if (cache->ro) {
+               ret = -EAGAIN;
        } else {
-               if (cache->ro)
-                       space_info->bytes_readonly += num_bytes;
-               cache->reserved -= num_bytes;
-               space_info->bytes_reserved -= num_bytes;
+               cache->reserved += num_bytes;
+               space_info->bytes_reserved += num_bytes;
 
+               trace_btrfs_space_reservation(cache->fs_info,
+                               "space_info", space_info->flags,
+                               ram_bytes, 0);
+               space_info->bytes_may_use -= ram_bytes;
                if (delalloc)
-                       cache->delalloc_bytes -= num_bytes;
+                       cache->delalloc_bytes += num_bytes;
        }
        spin_unlock(&cache->lock);
        spin_unlock(&space_info->lock);
        return ret;
 }
 
+/**
+ * btrfs_free_reserved_bytes - update the block_group and space info counters
+ * @cache:      The cache we are manipulating
+ * @num_bytes:  The number of bytes in question
+ * @delalloc:   The blocks are allocated for the delalloc write
+ *
+ * This is called by somebody who is freeing space that was never actually used
+ * on disk.  For example if you reserve some space for a new leaf in transaction
+ * A and before transaction A commits you free that leaf, you call this with
+ * reserve set to 0 in order to clear the reservation.
+ */
+
+static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
+                                    u64 num_bytes, int delalloc)
+{
+       struct btrfs_space_info *space_info = cache->space_info;
+       int ret = 0;
+
+       spin_lock(&space_info->lock);
+       spin_lock(&cache->lock);
+       if (cache->ro)
+               space_info->bytes_readonly += num_bytes;
+       cache->reserved -= num_bytes;
+       space_info->bytes_reserved -= num_bytes;
+
+       if (delalloc)
+               cache->delalloc_bytes -= num_bytes;
+       spin_unlock(&cache->lock);
+       spin_unlock(&space_info->lock);
+       return ret;
+}
 void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root)
 {
@@ -7191,7 +7201,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
                WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
 
                btrfs_add_free_space(cache, buf->start, buf->len);
-               btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
+               btrfs_free_reserved_bytes(cache, buf->len, 0);
                btrfs_put_block_group(cache);
                trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
                pin = 0;
@@ -7416,9 +7426,9 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache,
  * the free space extent currently.
  */
 static noinline int find_free_extent(struct btrfs_root *orig_root,
-                                    u64 num_bytes, u64 empty_size,
-                                    u64 hint_byte, struct btrfs_key *ins,
-                                    u64 flags, int delalloc)
+                               u64 ram_bytes, u64 num_bytes, u64 empty_size,
+                               u64 hint_byte, struct btrfs_key *ins,
+                               u64 flags, int delalloc)
 {
        int ret = 0;
        struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -7430,8 +7440,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
        struct btrfs_space_info *space_info;
        int loop = 0;
        int index = __get_raid_index(flags);
-       int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
-               RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
        bool failed_cluster_refill = false;
        bool failed_alloc = false;
        bool use_cluster = true;
@@ -7763,8 +7771,8 @@ checks:
                                             search_start - offset);
                BUG_ON(offset > search_start);
 
-               ret = btrfs_update_reserved_bytes(block_group, num_bytes,
-                                                 alloc_type, delalloc);
+               ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
+                               num_bytes, delalloc);
                if (ret == -EAGAIN) {
                        btrfs_add_free_space(block_group, offset, num_bytes);
                        goto loop;
@@ -7936,7 +7944,7 @@ again:
        up_read(&info->groups_sem);
 }
 
-int btrfs_reserve_extent(struct btrfs_root *root,
+int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
                         u64 num_bytes, u64 min_alloc_size,
                         u64 empty_size, u64 hint_byte,
                         struct btrfs_key *ins, int is_data, int delalloc)
@@ -7948,8 +7956,8 @@ int btrfs_reserve_extent(struct btrfs_root *root,
        flags = btrfs_get_alloc_profile(root, is_data);
 again:
        WARN_ON(num_bytes < root->sectorsize);
-       ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
-                              flags, delalloc);
+       ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
+                              hint_byte, ins, flags, delalloc);
        if (!ret && !is_data) {
                btrfs_dec_block_group_reservations(root->fs_info,
                                                   ins->objectid);
@@ -7958,6 +7966,7 @@ again:
                        num_bytes = min(num_bytes >> 1, ins->offset);
                        num_bytes = round_down(num_bytes, root->sectorsize);
                        num_bytes = max(num_bytes, min_alloc_size);
+                       ram_bytes = num_bytes;
                        if (num_bytes == min_alloc_size)
                                final_tried = true;
                        goto again;
@@ -7995,7 +8004,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
                if (btrfs_test_opt(root->fs_info, DISCARD))
                        ret = btrfs_discard_extent(root, start, len, NULL);
                btrfs_add_free_space(cache, start, len);
-               btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
+               btrfs_free_reserved_bytes(cache, len, delalloc);
                trace_btrfs_reserved_extent_free(root, start, len);
        }
 
@@ -8208,6 +8217,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
 {
        int ret;
        struct btrfs_block_group_cache *block_group;
+       struct btrfs_space_info *space_info;
 
        /*
         * Mixed block groups will exclude before processing the log so we only
@@ -8223,9 +8233,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
        if (!block_group)
                return -EINVAL;
 
-       ret = btrfs_update_reserved_bytes(block_group, ins->offset,
-                                         RESERVE_ALLOC_NO_ACCOUNT, 0);
-       BUG_ON(ret); /* logic error */
+       space_info = block_group->space_info;
+       spin_lock(&space_info->lock);
+       spin_lock(&block_group->lock);
+       space_info->bytes_reserved += ins->offset;
+       block_group->reserved += ins->offset;
+       spin_unlock(&block_group->lock);
+       spin_unlock(&space_info->lock);
+
        ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
                                         0, owner, offset, ins, 1);
        btrfs_put_block_group(block_group);
@@ -8368,7 +8383,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
        if (IS_ERR(block_rsv))
                return ERR_CAST(block_rsv);
 
-       ret = btrfs_reserve_extent(root, blocksize, blocksize,
+       ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
                                   empty_size, hint, &ins, 0, 0);
        if (ret)
                goto out_unuse;
@@ -8521,35 +8536,6 @@ reada:
        wc->reada_slot = slot;
 }
 
-/*
- * These may not be seen by the usual inc/dec ref code so we have to
- * add them here.
- */
-static int record_one_subtree_extent(struct btrfs_trans_handle *trans,
-                                    struct btrfs_root *root, u64 bytenr,
-                                    u64 num_bytes)
-{
-       struct btrfs_qgroup_extent_record *qrecord;
-       struct btrfs_delayed_ref_root *delayed_refs;
-
-       qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS);
-       if (!qrecord)
-               return -ENOMEM;
-
-       qrecord->bytenr = bytenr;
-       qrecord->num_bytes = num_bytes;
-       qrecord->old_roots = NULL;
-
-       delayed_refs = &trans->transaction->delayed_refs;
-       spin_lock(&delayed_refs->lock);
-       if (btrfs_qgroup_insert_dirty_extent(trans->fs_info,
-                                            delayed_refs, qrecord))
-               kfree(qrecord);
-       spin_unlock(&delayed_refs->lock);
-
-       return 0;
-}
-
 static int account_leaf_items(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              struct extent_buffer *eb)
@@ -8583,7 +8569,8 @@ static int account_leaf_items(struct btrfs_trans_handle *trans,
 
                num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
 
-               ret = record_one_subtree_extent(trans, root, bytenr, num_bytes);
+               ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info,
+                               bytenr, num_bytes, GFP_NOFS);
                if (ret)
                        return ret;
        }
@@ -8732,8 +8719,9 @@ walk_down:
                        btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
                        path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
 
-                       ret = record_one_subtree_extent(trans, root, child_bytenr,
-                                                       root->nodesize);
+                       ret = btrfs_qgroup_insert_dirty_extent(trans,
+                                       root->fs_info, child_bytenr,
+                                       root->nodesize, GFP_NOFS);
                        if (ret)
                                goto out;
                }
@@ -9906,6 +9894,7 @@ static int find_first_block_group(struct btrfs_root *root,
                        } else {
                                ret = 0;
                        }
+                       free_extent_map(em);
                        goto out;
                }
                path->slots[0]++;
@@ -9942,6 +9931,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
                block_group->iref = 0;
                block_group->inode = NULL;
                spin_unlock(&block_group->lock);
+               ASSERT(block_group->io_ctl.inode == NULL);
                iput(inode);
                last = block_group->key.objectid + block_group->key.offset;
                btrfs_put_block_group(block_group);
@@ -9999,6 +9989,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                        free_excluded_extents(info->extent_root, block_group);
 
                btrfs_remove_free_space_cache(block_group);
+               ASSERT(list_empty(&block_group->dirty_list));
+               ASSERT(list_empty(&block_group->io_list));
+               ASSERT(list_empty(&block_group->bg_list));
+               ASSERT(atomic_read(&block_group->count) == 1);
                btrfs_put_block_group(block_group);
 
                spin_lock(&info->block_group_cache_lock);
index bc2729a..28cd88f 100644 (file)
@@ -20,6 +20,7 @@
 #define EXTENT_DAMAGED         (1U << 14)
 #define EXTENT_NORESERVE       (1U << 15)
 #define EXTENT_QGROUP_RESERVED (1U << 16)
+#define EXTENT_CLEAR_DATA_RESV (1U << 17)
 #define EXTENT_IOBITS          (EXTENT_LOCKED | EXTENT_WRITEBACK)
 #define EXTENT_CTLBITS         (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
 
index 5842423..fea31a4 100644 (file)
@@ -2070,7 +2070,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        }
        trans->sync = true;
 
-       btrfs_init_log_ctx(&ctx);
+       btrfs_init_log_ctx(&ctx, inode);
 
        ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
        if (ret < 0) {
@@ -2675,6 +2675,7 @@ static long btrfs_fallocate(struct file *file, int mode,
 
        alloc_start = round_down(offset, blocksize);
        alloc_end = round_up(offset + len, blocksize);
+       cur_offset = alloc_start;
 
        /* Make sure we aren't being give some crap mode */
        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -2767,7 +2768,6 @@ static long btrfs_fallocate(struct file *file, int mode,
 
        /* First, check if we exceed the qgroup limit */
        INIT_LIST_HEAD(&reserve_list);
-       cur_offset = alloc_start;
        while (1) {
                em = btrfs_get_extent(inode, NULL, 0, cur_offset,
                                      alloc_end - cur_offset, 0);
@@ -2794,6 +2794,14 @@ static long btrfs_fallocate(struct file *file, int mode,
                                        last_byte - cur_offset);
                        if (ret < 0)
                                break;
+               } else {
+                       /*
+                        * Do not need to reserve unwritten extent for this
+                        * range, free reserved data space first, otherwise
+                        * it'll result in false ENOSPC error.
+                        */
+                       btrfs_free_reserved_data_space(inode, cur_offset,
+                               last_byte - cur_offset);
                }
                free_extent_map(em);
                cur_offset = last_byte;
@@ -2811,6 +2819,9 @@ static long btrfs_fallocate(struct file *file, int mode,
                                        range->start,
                                        range->len, 1 << inode->i_blkbits,
                                        offset + len, &alloc_hint);
+               else
+                       btrfs_free_reserved_data_space(inode, range->start,
+                                                      range->len);
                list_del(&range->list);
                kfree(range);
        }
@@ -2845,18 +2856,11 @@ out_unlock:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
                             &cached_state, GFP_KERNEL);
 out:
-       /*
-        * As we waited the extent range, the data_rsv_map must be empty
-        * in the range, as written data range will be released from it.
-        * And for prealloacted extent, it will also be released when
-        * its metadata is written.
-        * So this is completely used as cleanup.
-        */
-       btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
        inode_unlock(inode);
        /* Let go of our reservation. */
-       btrfs_free_reserved_data_space(inode, alloc_start,
-                                      alloc_end - alloc_start);
+       if (ret != 0)
+               btrfs_free_reserved_data_space(inode, alloc_start,
+                                      alloc_end - cur_offset);
        return ret;
 }
 
index aa6faba..359ee86 100644 (file)
@@ -495,10 +495,9 @@ again:
        ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
                                              prealloc, prealloc, &alloc_hint);
        if (ret) {
-               btrfs_delalloc_release_space(inode, 0, prealloc);
+               btrfs_delalloc_release_metadata(inode, prealloc);
                goto out_put;
        }
-       btrfs_free_reserved_data_space(inode, 0, prealloc);
 
        ret = btrfs_write_out_ino_cache(root, trans, path, inode);
 out_put:
index 08dfc57..e6811c4 100644 (file)
@@ -566,6 +566,8 @@ cont:
                                                     PAGE_SET_WRITEBACK |
                                                     page_error_op |
                                                     PAGE_END_WRITEBACK);
+                       btrfs_free_reserved_data_space_noquota(inode, start,
+                                               end - start + 1);
                        goto free_pages_out;
                }
        }
@@ -742,7 +744,7 @@ retry:
                lock_extent(io_tree, async_extent->start,
                            async_extent->start + async_extent->ram_size - 1);
 
-               ret = btrfs_reserve_extent(root,
+               ret = btrfs_reserve_extent(root, async_extent->ram_size,
                                           async_extent->compressed_size,
                                           async_extent->compressed_size,
                                           0, alloc_hint, &ins, 1, 1);
@@ -969,7 +971,8 @@ static noinline int cow_file_range(struct inode *inode,
                                     EXTENT_DEFRAG, PAGE_UNLOCK |
                                     PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
                                     PAGE_END_WRITEBACK);
-
+                       btrfs_free_reserved_data_space_noquota(inode, start,
+                                               end - start + 1);
                        *nr_written = *nr_written +
                             (end - start + PAGE_SIZE) / PAGE_SIZE;
                        *page_started = 1;
@@ -989,7 +992,7 @@ static noinline int cow_file_range(struct inode *inode,
                unsigned long op;
 
                cur_alloc_size = disk_num_bytes;
-               ret = btrfs_reserve_extent(root, cur_alloc_size,
+               ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
                                           root->sectorsize, 0, alloc_hint,
                                           &ins, 1, 1);
                if (ret < 0)
@@ -1489,8 +1492,10 @@ out_check:
                extent_clear_unlock_delalloc(inode, cur_offset,
                                             cur_offset + num_bytes - 1,
                                             locked_page, EXTENT_LOCKED |
-                                            EXTENT_DELALLOC, PAGE_UNLOCK |
-                                            PAGE_SET_PRIVATE2);
+                                            EXTENT_DELALLOC |
+                                            EXTENT_CLEAR_DATA_RESV,
+                                            PAGE_UNLOCK | PAGE_SET_PRIVATE2);
+
                if (!nolock && nocow)
                        btrfs_end_write_no_snapshoting(root);
                cur_offset = extent_end;
@@ -1807,7 +1812,9 @@ static void btrfs_clear_bit_hook(struct inode *inode,
                        return;
 
                if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
-                   && do_list && !(state->state & EXTENT_NORESERVE))
+                   && do_list && !(state->state & EXTENT_NORESERVE)
+                   && (*bits & (EXTENT_DO_ACCOUNTING |
+                   EXTENT_CLEAR_DATA_RESV)))
                        btrfs_free_reserved_data_space_noquota(inode,
                                        state->start, len);
 
@@ -7251,7 +7258,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
        int ret;
 
        alloc_hint = get_extent_allocation_hint(inode, start, len);
-       ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
+       ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0,
                                   alloc_hint, &ins, 1, 1);
        if (ret)
                return ERR_PTR(ret);
@@ -7751,6 +7758,13 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                                ret = PTR_ERR(em2);
                                goto unlock_err;
                        }
+                       /*
+                        * For inode marked NODATACOW or extent marked PREALLOC,
+                        * use the existing or preallocated extent, so does not
+                        * need to adjust btrfs_space_info's bytes_may_use.
+                        */
+                       btrfs_free_reserved_data_space_noquota(inode,
+                                       start, len);
                        goto unlock;
                }
        }
@@ -7785,7 +7799,6 @@ unlock:
                        i_size_write(inode, start + len);
 
                adjust_dio_outstanding_extents(inode, dio_data, len);
-               btrfs_free_reserved_data_space(inode, start, len);
                WARN_ON(dio_data->reserve < len);
                dio_data->reserve -= len;
                dio_data->unsubmitted_oe_range_end = start + len;
@@ -10306,6 +10319,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
        u64 last_alloc = (u64)-1;
        int ret = 0;
        bool own_trans = true;
+       u64 end = start + num_bytes - 1;
 
        if (trans)
                own_trans = false;
@@ -10327,8 +10341,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
                 * sized chunks.
                 */
                cur_bytes = min(cur_bytes, last_alloc);
-               ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
-                                          *alloc_hint, &ins, 1, 0);
+               ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
+                               min_size, 0, *alloc_hint, &ins, 1, 0);
                if (ret) {
                        if (own_trans)
                                btrfs_end_transaction(trans, root);
@@ -10414,6 +10428,9 @@ next:
                if (own_trans)
                        btrfs_end_transaction(trans, root);
        }
+       if (cur_offset < end)
+               btrfs_free_reserved_data_space(inode, cur_offset,
+                       end - cur_offset + 1);
        return ret;
 }
 
index 14ed1e9..b2a2da5 100644 (file)
@@ -5084,7 +5084,7 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       return btrfs_qgroup_wait_for_completion(root->fs_info);
+       return btrfs_qgroup_wait_for_completion(root->fs_info, true);
 }
 
 static long _btrfs_ioctl_set_received_subvol(struct file *file,
index 93ee1c1..8db2e29 100644 (file)
@@ -995,7 +995,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
                goto out;
        fs_info->quota_enabled = 0;
        fs_info->pending_quota_state = 0;
-       btrfs_qgroup_wait_for_completion(fs_info);
+       btrfs_qgroup_wait_for_completion(fs_info, false);
        spin_lock(&fs_info->qgroup_lock);
        quota_root = fs_info->quota_root;
        fs_info->quota_root = NULL;
@@ -1453,10 +1453,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
        return ret;
 }
 
-struct btrfs_qgroup_extent_record *
-btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info,
-                                struct btrfs_delayed_ref_root *delayed_refs,
-                                struct btrfs_qgroup_extent_record *record)
+int btrfs_qgroup_insert_dirty_extent_nolock(struct btrfs_fs_info *fs_info,
+                               struct btrfs_delayed_ref_root *delayed_refs,
+                               struct btrfs_qgroup_extent_record *record)
 {
        struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
        struct rb_node *parent_node = NULL;
@@ -1475,12 +1474,42 @@ btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info,
                else if (bytenr > entry->bytenr)
                        p = &(*p)->rb_right;
                else
-                       return entry;
+                       return 1;
        }
 
        rb_link_node(&record->node, parent_node, p);
        rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
-       return NULL;
+       return 0;
+}
+
+int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans,
+               struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
+               gfp_t gfp_flag)
+{
+       struct btrfs_qgroup_extent_record *record;
+       struct btrfs_delayed_ref_root *delayed_refs;
+       int ret;
+
+       if (!fs_info->quota_enabled || bytenr == 0 || num_bytes == 0)
+               return 0;
+       if (WARN_ON(trans == NULL))
+               return -EINVAL;
+       record = kmalloc(sizeof(*record), gfp_flag);
+       if (!record)
+               return -ENOMEM;
+
+       delayed_refs = &trans->transaction->delayed_refs;
+       record->bytenr = bytenr;
+       record->num_bytes = num_bytes;
+       record->old_roots = NULL;
+
+       spin_lock(&delayed_refs->lock);
+       ret = btrfs_qgroup_insert_dirty_extent_nolock(fs_info, delayed_refs,
+                                                     record);
+       spin_unlock(&delayed_refs->lock);
+       if (ret > 0)
+               kfree(record);
+       return 0;
 }
 
 #define UPDATE_NEW     0
@@ -2303,6 +2332,10 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
        int err = -ENOMEM;
        int ret = 0;
 
+       mutex_lock(&fs_info->qgroup_rescan_lock);
+       fs_info->qgroup_rescan_running = true;
+       mutex_unlock(&fs_info->qgroup_rescan_lock);
+
        path = btrfs_alloc_path();
        if (!path)
                goto out;
@@ -2369,6 +2402,9 @@ out:
        }
 
 done:
+       mutex_lock(&fs_info->qgroup_rescan_lock);
+       fs_info->qgroup_rescan_running = false;
+       mutex_unlock(&fs_info->qgroup_rescan_lock);
        complete_all(&fs_info->qgroup_rescan_completion);
 }
 
@@ -2487,20 +2523,26 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
        return 0;
 }
 
-int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
+int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
+                                    bool interruptible)
 {
        int running;
        int ret = 0;
 
        mutex_lock(&fs_info->qgroup_rescan_lock);
        spin_lock(&fs_info->qgroup_lock);
-       running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+       running = fs_info->qgroup_rescan_running;
        spin_unlock(&fs_info->qgroup_lock);
        mutex_unlock(&fs_info->qgroup_rescan_lock);
 
-       if (running)
+       if (!running)
+               return 0;
+
+       if (interruptible)
                ret = wait_for_completion_interruptible(
                                        &fs_info->qgroup_rescan_completion);
+       else
+               wait_for_completion(&fs_info->qgroup_rescan_completion);
 
        return ret;
 }
index 710887c..1bc64c8 100644 (file)
@@ -46,7 +46,8 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
                        struct btrfs_fs_info *fs_info);
 int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
 void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
-int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
+int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
+                                    bool interruptible);
 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
                              struct btrfs_fs_info *fs_info, u64 src, u64 dst);
 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
@@ -63,10 +64,35 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
 struct btrfs_delayed_extent_op;
 int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
                                         struct btrfs_fs_info *fs_info);
-struct btrfs_qgroup_extent_record *
-btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info,
-                                struct btrfs_delayed_ref_root *delayed_refs,
-                                struct btrfs_qgroup_extent_record *record);
+/*
+ * Insert one dirty extent record into @delayed_refs, informing qgroup to
+ * account that extent at commit trans time.
+ *
+ * No lock version, caller must acquire delayed ref lock and allocate memory.
+ *
+ * Return 0 for success insert
+ * Return >0 for existing record, caller can free @record safely.
+ * Error is not possible
+ */
+int btrfs_qgroup_insert_dirty_extent_nolock(
+               struct btrfs_fs_info *fs_info,
+               struct btrfs_delayed_ref_root *delayed_refs,
+               struct btrfs_qgroup_extent_record *record);
+
+/*
+ * Insert one dirty extent record into @delayed_refs, informing qgroup to
+ * account that extent at commit trans time.
+ *
+ * Better encapsulated version.
+ *
+ * Return 0 if the operation is done.
+ * Return <0 for error, like memory allocation failure or invalid parameter
+ * (NULL trans)
+ */
+int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans,
+               struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
+               gfp_t gfp_flag);
+
 int
 btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
                            struct btrfs_fs_info *fs_info,
index b26a5ae..c0c13dc 100644 (file)
@@ -31,6 +31,7 @@
 #include "async-thread.h"
 #include "free-space-cache.h"
 #include "inode-map.h"
+#include "qgroup.h"
 
 /*
  * backref_node, mapping_node and tree_block start with this
@@ -3037,15 +3038,19 @@ int prealloc_file_extent_cluster(struct inode *inode,
        u64 num_bytes;
        int nr = 0;
        int ret = 0;
+       u64 prealloc_start = cluster->start - offset;
+       u64 prealloc_end = cluster->end - offset;
+       u64 cur_offset;
 
        BUG_ON(cluster->start != cluster->boundary[0]);
        inode_lock(inode);
 
-       ret = btrfs_check_data_free_space(inode, cluster->start,
-                                         cluster->end + 1 - cluster->start);
+       ret = btrfs_check_data_free_space(inode, prealloc_start,
+                                         prealloc_end + 1 - prealloc_start);
        if (ret)
                goto out;
 
+       cur_offset = prealloc_start;
        while (nr < cluster->nr) {
                start = cluster->boundary[nr] - offset;
                if (nr + 1 < cluster->nr)
@@ -3055,16 +3060,21 @@ int prealloc_file_extent_cluster(struct inode *inode,
 
                lock_extent(&BTRFS_I(inode)->io_tree, start, end);
                num_bytes = end + 1 - start;
+               if (cur_offset < start)
+                       btrfs_free_reserved_data_space(inode, cur_offset,
+                                       start - cur_offset);
                ret = btrfs_prealloc_file_range(inode, 0, start,
                                                num_bytes, num_bytes,
                                                end + 1, &alloc_hint);
+               cur_offset = end + 1;
                unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
                if (ret)
                        break;
                nr++;
        }
-       btrfs_free_reserved_data_space(inode, cluster->start,
-                                      cluster->end + 1 - cluster->start);
+       if (cur_offset < prealloc_end)
+               btrfs_free_reserved_data_space(inode, cur_offset,
+                                      prealloc_end + 1 - cur_offset);
 out:
        inode_unlock(inode);
        return ret;
@@ -3916,6 +3926,90 @@ int prepare_to_relocate(struct reloc_control *rc)
        return 0;
 }
 
+/*
+ * Qgroup fixer for data chunk relocation.
+ * The data relocation is done in the following steps
+ * 1) Copy data extents into data reloc tree
+ * 2) Create tree reloc tree(special snapshot) for related subvolumes
+ * 3) Modify file extents in tree reloc tree
+ * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks
+ *
+ * The problem is, data and tree reloc tree are not accounted to qgroup,
+ * and 4) will only info qgroup to track tree blocks change, not file extents
+ * in the tree blocks.
+ *
+ * The good news is, related data extents are all in data reloc tree, so we
+ * only need to info qgroup to track all file extents in data reloc tree
+ * before commit trans.
+ */
+static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans,
+                                            struct reloc_control *rc)
+{
+       struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
+       struct inode *inode = rc->data_inode;
+       struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root;
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       int ret = 0;
+
+       if (!fs_info->quota_enabled)
+               return 0;
+
+       /*
+        * Only for stage where we update data pointers the qgroup fix is
+        * valid.
+        * For MOVING_DATA stage, we will miss the timing of swapping tree
+        * blocks, and won't fix it.
+        */
+       if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found))
+               return 0;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+       key.objectid = btrfs_ino(inode);
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = 0;
+
+       ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+
+       lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1);
+       while (1) {
+               struct btrfs_file_extent_item *fi;
+
+               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+               if (key.objectid > btrfs_ino(inode))
+                       break;
+               if (key.type != BTRFS_EXTENT_DATA_KEY)
+                       goto next;
+               fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                   struct btrfs_file_extent_item);
+               if (btrfs_file_extent_type(path->nodes[0], fi) !=
+                               BTRFS_FILE_EXTENT_REG)
+                       goto next;
+               ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info,
+                       btrfs_file_extent_disk_bytenr(path->nodes[0], fi),
+                       btrfs_file_extent_disk_num_bytes(path->nodes[0], fi),
+                       GFP_NOFS);
+               if (ret < 0)
+                       break;
+next:
+               ret = btrfs_next_item(data_reloc_root, path);
+               if (ret < 0)
+                       break;
+               if (ret > 0) {
+                       ret = 0;
+                       break;
+               }
+       }
+       unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1);
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
 static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
 {
        struct rb_root blocks = RB_ROOT;
@@ -4102,10 +4196,18 @@ restart:
 
        /* get rid of pinned extents */
        trans = btrfs_join_transaction(rc->extent_root);
-       if (IS_ERR(trans))
+       if (IS_ERR(trans)) {
                err = PTR_ERR(trans);
-       else
-               btrfs_commit_transaction(trans, rc->extent_root);
+               goto out_free;
+       }
+       ret = qgroup_fix_relocated_data_extents(trans, rc);
+       if (ret < 0) {
+               btrfs_abort_transaction(trans, ret);
+               if (!err)
+                       err = ret;
+               goto out_free;
+       }
+       btrfs_commit_transaction(trans, rc->extent_root);
 out_free:
        btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
        btrfs_free_path(path);
@@ -4468,10 +4570,16 @@ int btrfs_recover_relocation(struct btrfs_root *root)
        unset_reloc_control(rc);
 
        trans = btrfs_join_transaction(rc->extent_root);
-       if (IS_ERR(trans))
+       if (IS_ERR(trans)) {
                err = PTR_ERR(trans);
-       else
-               err = btrfs_commit_transaction(trans, rc->extent_root);
+               goto out_free;
+       }
+       err = qgroup_fix_relocated_data_extents(trans, rc);
+       if (err < 0) {
+               btrfs_abort_transaction(trans, err);
+               goto out_free;
+       }
+       err = btrfs_commit_transaction(trans, rc->extent_root);
 out_free:
        kfree(rc);
 out:
index 7fd7e18..0912960 100644 (file)
@@ -272,6 +272,23 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
                root_key.objectid = key.offset;
                key.offset++;
 
+               /*
+                * The root might have been inserted already, as before we look
+                * for orphan roots, log replay might have happened, which
+                * triggers a transaction commit and qgroup accounting, which
+                * in turn reads and inserts fs roots while doing backref
+                * walking.
+                */
+               root = btrfs_lookup_fs_root(tree_root->fs_info,
+                                           root_key.objectid);
+               if (root) {
+                       WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
+                                         &root->state));
+                       if (btrfs_root_refs(&root->root_item) == 0)
+                               btrfs_add_dead_root(root);
+                       continue;
+               }
+
                root = btrfs_read_fs_root(tree_root, &root_key);
                err = PTR_ERR_OR_ZERO(root);
                if (err && err != -ENOENT) {
@@ -310,16 +327,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
                set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
 
                err = btrfs_insert_fs_root(root->fs_info, root);
-               /*
-                * The root might have been inserted already, as before we look
-                * for orphan roots, log replay might have happened, which
-                * triggers a transaction commit and qgroup accounting, which
-                * in turn reads and inserts fs roots while doing backref
-                * walking.
-                */
-               if (err == -EEXIST)
-                       err = 0;
                if (err) {
+                       BUG_ON(err == -EEXIST);
                        btrfs_free_fs_root(root);
                        break;
                }
index efe129f..a87675f 100644 (file)
@@ -4268,10 +4268,12 @@ static int process_all_refs(struct send_ctx *sctx,
        }
        btrfs_release_path(path);
 
+       /*
+        * We don't actually care about pending_move as we are simply
+        * re-creating this inode and will be rename'ing it into place once we
+        * rename the parent directory.
+        */
        ret = process_recorded_refs(sctx, &pending_move);
-       /* Only applicable to an incremental send. */
-       ASSERT(pending_move == 0);
-
 out:
        btrfs_free_path(path);
        return ret;
index 864ce33..4071fe2 100644 (file)
@@ -2241,6 +2241,13 @@ static int btrfs_freeze(struct super_block *sb)
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = btrfs_sb(sb)->tree_root;
 
+       root->fs_info->fs_frozen = 1;
+       /*
+        * We don't need a barrier here, we'll wait for any transaction that
+        * could be in progress on other threads (and do delayed iputs that
+        * we want to avoid on a frozen filesystem), or do the commit
+        * ourselves.
+        */
        trans = btrfs_attach_transaction_barrier(root);
        if (IS_ERR(trans)) {
                /* no transaction, don't bother */
@@ -2251,6 +2258,14 @@ static int btrfs_freeze(struct super_block *sb)
        return btrfs_commit_transaction(trans, root);
 }
 
+static int btrfs_unfreeze(struct super_block *sb)
+{
+       struct btrfs_root *root = btrfs_sb(sb)->tree_root;
+
+       root->fs_info->fs_frozen = 0;
+       return 0;
+}
+
 static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
@@ -2299,6 +2314,7 @@ static const struct super_operations btrfs_super_ops = {
        .statfs         = btrfs_statfs,
        .remount_fs     = btrfs_remount,
        .freeze_fs      = btrfs_freeze,
+       .unfreeze_fs    = btrfs_unfreeze,
 };
 
 static const struct file_operations btrfs_ctl_fops = {
index 9cca0a7..95d4191 100644 (file)
@@ -2278,8 +2278,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
        kmem_cache_free(btrfs_trans_handle_cachep, trans);
 
+       /*
+        * If fs has been frozen, we can not handle delayed iputs, otherwise
+        * it'll result in deadlock about SB_FREEZE_FS.
+        */
        if (current != root->fs_info->transaction_kthread &&
-           current != root->fs_info->cleaner_kthread)
+           current != root->fs_info->cleaner_kthread &&
+           !root->fs_info->fs_frozen)
                btrfs_run_delayed_iputs(root);
 
        return ret;
index fff3f3e..ef9c55b 100644 (file)
@@ -27,6 +27,7 @@
 #include "backref.h"
 #include "hash.h"
 #include "compression.h"
+#include "qgroup.h"
 
 /* magic values for the inode_only field in btrfs_log_inode:
  *
@@ -680,6 +681,21 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
                ins.type = BTRFS_EXTENT_ITEM_KEY;
                offset = key->offset - btrfs_file_extent_offset(eb, item);
 
+               /*
+                * Manually record dirty extent, as here we did a shallow
+                * file extent item copy and skip normal backref update,
+                * but modifying extent tree all by ourselves.
+                * So need to manually record dirty extent for qgroup,
+                * as the owner of the file extent changed from log tree
+                * (doesn't affect qgroup) to fs/file tree(affects qgroup)
+                */
+               ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info,
+                               btrfs_file_extent_disk_bytenr(eb, item),
+                               btrfs_file_extent_disk_num_bytes(eb, item),
+                               GFP_NOFS);
+               if (ret < 0)
+                       goto out;
+
                if (ins.objectid > 0) {
                        u64 csum_start;
                        u64 csum_end;
@@ -2807,7 +2823,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
         */
        mutex_unlock(&root->log_mutex);
 
-       btrfs_init_log_ctx(&root_log_ctx);
+       btrfs_init_log_ctx(&root_log_ctx, NULL);
 
        mutex_lock(&log_root_tree->log_mutex);
        atomic_inc(&log_root_tree->log_batch);
@@ -2851,6 +2867,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
        if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) {
                blk_finish_plug(&plug);
+               list_del_init(&root_log_ctx.list);
                mutex_unlock(&log_root_tree->log_mutex);
                ret = root_log_ctx.log_ret;
                goto out;
@@ -4741,7 +4758,8 @@ again:
                        if (ret < 0) {
                                err = ret;
                                goto out_unlock;
-                       } else if (ret > 0) {
+                       } else if (ret > 0 && ctx &&
+                                  other_ino != btrfs_ino(ctx->inode)) {
                                struct btrfs_key inode_key;
                                struct inode *other_inode;
 
index a9f1b75..ab858e3 100644 (file)
@@ -30,15 +30,18 @@ struct btrfs_log_ctx {
        int log_transid;
        int io_err;
        bool log_new_dentries;
+       struct inode *inode;
        struct list_head list;
 };
 
-static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx)
+static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
+                                     struct inode *inode)
 {
        ctx->log_ret = 0;
        ctx->log_transid = 0;
        ctx->io_err = 0;
        ctx->log_new_dentries = false;
+       ctx->inode = inode;
        INIT_LIST_HEAD(&ctx->list);
 }
 
index 51f1255..035efce 100644 (file)
@@ -834,10 +834,6 @@ static void __free_device(struct work_struct *work)
        struct btrfs_device *device;
 
        device = container_of(work, struct btrfs_device, rcu_work);
-
-       if (device->bdev)
-               blkdev_put(device->bdev, device->mode);
-
        rcu_string_free(device->name);
        kfree(device);
 }
@@ -852,6 +848,17 @@ static void free_device(struct rcu_head *head)
        schedule_work(&device->rcu_work);
 }
 
+static void btrfs_close_bdev(struct btrfs_device *device)
+{
+       if (device->bdev && device->writeable) {
+               sync_blockdev(device->bdev);
+               invalidate_bdev(device->bdev);
+       }
+
+       if (device->bdev)
+               blkdev_put(device->bdev, device->mode);
+}
+
 static void btrfs_close_one_device(struct btrfs_device *device)
 {
        struct btrfs_fs_devices *fs_devices = device->fs_devices;
@@ -870,10 +877,7 @@ static void btrfs_close_one_device(struct btrfs_device *device)
        if (device->missing)
                fs_devices->missing_devices--;
 
-       if (device->bdev && device->writeable) {
-               sync_blockdev(device->bdev);
-               invalidate_bdev(device->bdev);
-       }
+       btrfs_close_bdev(device);
 
        new_device = btrfs_alloc_device(NULL, &device->devid,
                                        device->uuid);
@@ -1932,6 +1936,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid)
                btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device);
        }
 
+       btrfs_close_bdev(device);
+
        call_rcu(&device->rcu, free_device);
 
        num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
@@ -2025,6 +2031,9 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
                /* zero out the old super if it is writable */
                btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
        }
+
+       btrfs_close_bdev(srcdev);
+
        call_rcu(&srcdev->rcu, free_device);
 
        /*
@@ -2080,6 +2089,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
         * the device_list_mutex lock.
         */
        btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
+
+       btrfs_close_bdev(tgtdev);
        call_rcu(&tgtdev->rcu, free_device);
 }
 
index c64a0b7..df4b3e6 100644 (file)
@@ -597,7 +597,7 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
        if (is_hash_order(new_pos)) {
                /* no need to reset last_name for a forward seek when
                 * dentries are sotred in hash order */
-       } else if (fi->frag |= fpos_frag(new_pos)) {
+       } else if (fi->frag != fpos_frag(new_pos)) {
                return true;
        }
        rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL;
index 0f9961e..ed115ac 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/random.h>
 #include <linux/string.h>
 #include <linux/fscrypto.h>
+#include <linux/mount.h>
 
 static int inode_has_encryption_context(struct inode *inode)
 {
@@ -92,26 +93,42 @@ static int create_encryption_context_from_policy(struct inode *inode,
        return inode->i_sb->s_cop->set_context(inode, &ctx, sizeof(ctx), NULL);
 }
 
-int fscrypt_process_policy(struct inode *inode,
+int fscrypt_process_policy(struct file *filp,
                                const struct fscrypt_policy *policy)
 {
+       struct inode *inode = file_inode(filp);
+       int ret;
+
+       if (!inode_owner_or_capable(inode))
+               return -EACCES;
+
        if (policy->version != 0)
                return -EINVAL;
 
+       ret = mnt_want_write_file(filp);
+       if (ret)
+               return ret;
+
        if (!inode_has_encryption_context(inode)) {
-               if (!inode->i_sb->s_cop->empty_dir)
-                       return -EOPNOTSUPP;
-               if (!inode->i_sb->s_cop->empty_dir(inode))
-                       return -ENOTEMPTY;
-               return create_encryption_context_from_policy(inode, policy);
+               if (!S_ISDIR(inode->i_mode))
+                       ret = -EINVAL;
+               else if (!inode->i_sb->s_cop->empty_dir)
+                       ret = -EOPNOTSUPP;
+               else if (!inode->i_sb->s_cop->empty_dir(inode))
+                       ret = -ENOTEMPTY;
+               else
+                       ret = create_encryption_context_from_policy(inode,
+                                                                   policy);
+       } else if (!is_encryption_context_consistent_with_policy(inode,
+                                                                policy)) {
+               printk(KERN_WARNING
+                      "%s: Policy inconsistent with encryption context\n",
+                      __func__);
+               ret = -EINVAL;
        }
 
-       if (is_encryption_context_consistent_with_policy(inode, policy))
-               return 0;
-
-       printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n",
-              __func__);
-       return -EINVAL;
+       mnt_drop_write_file(filp);
+       return ret;
 }
 EXPORT_SYMBOL(fscrypt_process_policy);
 
index d116453..79a5941 100644 (file)
@@ -585,7 +585,8 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
  */
 void *devpts_get_priv(struct dentry *dentry)
 {
-       WARN_ON_ONCE(dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC);
+       if (dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC)
+               return NULL;
        return dentry->d_fsdata;
 }
 
index eea6491..466f7d6 100644 (file)
@@ -607,20 +607,54 @@ static const struct file_operations format2_fops;
 static const struct file_operations format3_fops;
 static const struct file_operations format4_fops;
 
-static int table_open(struct inode *inode, struct file *file)
+static int table_open1(struct inode *inode, struct file *file)
 {
        struct seq_file *seq;
-       int ret = -1;
+       int ret;
 
-       if (file->f_op == &format1_fops)
-               ret = seq_open(file, &format1_seq_ops);
-       else if (file->f_op == &format2_fops)
-               ret = seq_open(file, &format2_seq_ops);
-       else if (file->f_op == &format3_fops)
-               ret = seq_open(file, &format3_seq_ops);
-       else if (file->f_op == &format4_fops)
-               ret = seq_open(file, &format4_seq_ops);
+       ret = seq_open(file, &format1_seq_ops);
+       if (ret)
+               return ret;
+
+       seq = file->private_data;
+       seq->private = inode->i_private; /* the dlm_ls */
+       return 0;
+}
+
+static int table_open2(struct inode *inode, struct file *file)
+{
+       struct seq_file *seq;
+       int ret;
+
+       ret = seq_open(file, &format2_seq_ops);
+       if (ret)
+               return ret;
+
+       seq = file->private_data;
+       seq->private = inode->i_private; /* the dlm_ls */
+       return 0;
+}
+
+static int table_open3(struct inode *inode, struct file *file)
+{
+       struct seq_file *seq;
+       int ret;
+
+       ret = seq_open(file, &format3_seq_ops);
+       if (ret)
+               return ret;
+
+       seq = file->private_data;
+       seq->private = inode->i_private; /* the dlm_ls */
+       return 0;
+}
+
+static int table_open4(struct inode *inode, struct file *file)
+{
+       struct seq_file *seq;
+       int ret;
 
+       ret = seq_open(file, &format4_seq_ops);
        if (ret)
                return ret;
 
@@ -631,7 +665,7 @@ static int table_open(struct inode *inode, struct file *file)
 
 static const struct file_operations format1_fops = {
        .owner   = THIS_MODULE,
-       .open    = table_open,
+       .open    = table_open1,
        .read    = seq_read,
        .llseek  = seq_lseek,
        .release = seq_release
@@ -639,7 +673,7 @@ static const struct file_operations format1_fops = {
 
 static const struct file_operations format2_fops = {
        .owner   = THIS_MODULE,
-       .open    = table_open,
+       .open    = table_open2,
        .read    = seq_read,
        .llseek  = seq_lseek,
        .release = seq_release
@@ -647,7 +681,7 @@ static const struct file_operations format2_fops = {
 
 static const struct file_operations format3_fops = {
        .owner   = THIS_MODULE,
-       .open    = table_open,
+       .open    = table_open3,
        .read    = seq_read,
        .llseek  = seq_lseek,
        .release = seq_release
@@ -655,7 +689,7 @@ static const struct file_operations format3_fops = {
 
 static const struct file_operations format4_fops = {
        .owner   = THIS_MODULE,
-       .open    = table_open,
+       .open    = table_open4,
        .read    = seq_read,
        .llseek  = seq_lseek,
        .release = seq_release
index 3131747..c6ea25a 100644 (file)
@@ -5466,8 +5466,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
                                                      sbi->s_want_extra_isize,
                                                      iloc, handle);
                        if (ret) {
-                               ext4_set_inode_state(inode,
-                                                    EXT4_STATE_NO_EXPAND);
                                if (mnt_count !=
                                        le16_to_cpu(sbi->s_es->s_mnt_count)) {
                                        ext4_warning(inode->i_sb,
index 10686fd..1bb7df5 100644 (file)
@@ -776,7 +776,7 @@ resizefs_out:
                                   (struct fscrypt_policy __user *)arg,
                                   sizeof(policy)))
                        return -EFAULT;
-               return fscrypt_process_policy(inode, &policy);
+               return fscrypt_process_policy(filp, &policy);
 #else
                return -EOPNOTSUPP;
 #endif
index 1c593aa..3ec8708 100644 (file)
@@ -2211,6 +2211,7 @@ void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
 
 /* Called at mount-time, super-block is locked */
 static int ext4_check_descriptors(struct super_block *sb,
+                                 ext4_fsblk_t sb_block,
                                  ext4_group_t *first_not_zeroed)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2241,6 +2242,11 @@ static int ext4_check_descriptors(struct super_block *sb,
                        grp = i;
 
                block_bitmap = ext4_block_bitmap(sb, gdp);
+               if (block_bitmap == sb_block) {
+                       ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+                                "Block bitmap for group %u overlaps "
+                                "superblock", i);
+               }
                if (block_bitmap < first_block || block_bitmap > last_block) {
                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
                               "Block bitmap for group %u not in group "
@@ -2248,6 +2254,11 @@ static int ext4_check_descriptors(struct super_block *sb,
                        return 0;
                }
                inode_bitmap = ext4_inode_bitmap(sb, gdp);
+               if (inode_bitmap == sb_block) {
+                       ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+                                "Inode bitmap for group %u overlaps "
+                                "superblock", i);
+               }
                if (inode_bitmap < first_block || inode_bitmap > last_block) {
                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
                               "Inode bitmap for group %u not in group "
@@ -2255,6 +2266,11 @@ static int ext4_check_descriptors(struct super_block *sb,
                        return 0;
                }
                inode_table = ext4_inode_table(sb, gdp);
+               if (inode_table == sb_block) {
+                       ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+                                "Inode table for group %u overlaps "
+                                "superblock", i);
+               }
                if (inode_table < first_block ||
                    inode_table + sbi->s_itb_per_group - 1 > last_block) {
                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -3757,7 +3773,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                        goto failed_mount2;
                }
        }
-       if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
+       if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
                ret = -EFSCORRUPTED;
                goto failed_mount2;
index 39e9cfb..2eb935c 100644 (file)
@@ -1353,15 +1353,19 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
        size_t min_offs, free;
        int total_ino;
        void *base, *start, *end;
-       int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
+       int error = 0, tried_min_extra_isize = 0;
        int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
+       int isize_diff; /* How much do we need to grow i_extra_isize */
 
        down_write(&EXT4_I(inode)->xattr_sem);
+       /*
+        * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
+        */
+       ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
 retry:
-       if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) {
-               up_write(&EXT4_I(inode)->xattr_sem);
-               return 0;
-       }
+       isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize;
+       if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
+               goto out;
 
        header = IHDR(inode, raw_inode);
        entry = IFIRST(header);
@@ -1382,7 +1386,7 @@ retry:
                goto cleanup;
 
        free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
-       if (free >= new_extra_isize) {
+       if (free >= isize_diff) {
                entry = IFIRST(header);
                ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize
                                - new_extra_isize, (void *)raw_inode +
@@ -1390,8 +1394,7 @@ retry:
                                (void *)header, total_ino,
                                inode->i_sb->s_blocksize);
                EXT4_I(inode)->i_extra_isize = new_extra_isize;
-               error = 0;
-               goto cleanup;
+               goto out;
        }
 
        /*
@@ -1414,7 +1417,7 @@ retry:
                end = bh->b_data + bh->b_size;
                min_offs = end - base;
                free = ext4_xattr_free_space(first, &min_offs, base, NULL);
-               if (free < new_extra_isize) {
+               if (free < isize_diff) {
                        if (!tried_min_extra_isize && s_min_extra_isize) {
                                tried_min_extra_isize++;
                                new_extra_isize = s_min_extra_isize;
@@ -1428,7 +1431,7 @@ retry:
                free = inode->i_sb->s_blocksize;
        }
 
-       while (new_extra_isize > 0) {
+       while (isize_diff > 0) {
                size_t offs, size, entry_size;
                struct ext4_xattr_entry *small_entry = NULL;
                struct ext4_xattr_info i = {
@@ -1459,7 +1462,7 @@ retry:
                        EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
                                        EXT4_XATTR_LEN(last->e_name_len);
                        if (total_size <= free && total_size < min_total_size) {
-                               if (total_size < new_extra_isize) {
+                               if (total_size < isize_diff) {
                                        small_entry = last;
                                } else {
                                        entry = last;
@@ -1514,22 +1517,22 @@ retry:
                error = ext4_xattr_ibody_set(handle, inode, &i, is);
                if (error)
                        goto cleanup;
+               total_ino -= entry_size;
 
                entry = IFIRST(header);
-               if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
-                       shift_bytes = new_extra_isize;
+               if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff)
+                       shift_bytes = isize_diff;
                else
-                       shift_bytes = entry_size + size;
+                       shift_bytes = entry_size + EXT4_XATTR_SIZE(size);
                /* Adjust the offsets and shift the remaining entries ahead */
-               ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize -
-                       shift_bytes, (void *)raw_inode +
-                       EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes,
-                       (void *)header, total_ino - entry_size,
-                       inode->i_sb->s_blocksize);
+               ext4_xattr_shift_entries(entry, -shift_bytes,
+                       (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
+                       EXT4_I(inode)->i_extra_isize + shift_bytes,
+                       (void *)header, total_ino, inode->i_sb->s_blocksize);
 
-               extra_isize += shift_bytes;
-               new_extra_isize -= shift_bytes;
-               EXT4_I(inode)->i_extra_isize = extra_isize;
+               isize_diff -= shift_bytes;
+               EXT4_I(inode)->i_extra_isize += shift_bytes;
+               header = IHDR(inode, raw_inode);
 
                i.name = b_entry_name;
                i.value = buffer;
@@ -1551,6 +1554,8 @@ retry:
                kfree(bs);
        }
        brelse(bh);
+out:
+       ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
        up_write(&EXT4_I(inode)->xattr_sem);
        return 0;
 
@@ -1562,6 +1567,10 @@ cleanup:
        kfree(is);
        kfree(bs);
        brelse(bh);
+       /*
+        * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
+        * size expansion failed.
+        */
        up_write(&EXT4_I(inode)->xattr_sem);
        return error;
 }
index 69dd3e6..a92e783 100644 (file)
@@ -24,6 +24,7 @@
 #define EXT4_XATTR_INDEX_SYSTEM                        7
 #define EXT4_XATTR_INDEX_RICHACL               8
 #define EXT4_XATTR_INDEX_ENCRYPTION            9
+#define EXT4_XATTR_INDEX_HURD                  10 /* Reserved for Hurd */
 
 struct ext4_xattr_header {
        __le32  h_magic;        /* magic number for identification */
index d64d2a5..ccb401e 100644 (file)
@@ -1699,11 +1699,11 @@ static int f2fs_write_end(struct file *file,
        trace_f2fs_write_end(inode, pos, len, copied);
 
        set_page_dirty(page);
-       f2fs_put_page(page, 1);
 
        if (pos + copied > i_size_read(inode))
                f2fs_i_size_write(inode, pos + copied);
 
+       f2fs_put_page(page, 1);
        f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
        return copied;
 }
index 675fa79..14f5fe2 100644 (file)
@@ -538,7 +538,7 @@ struct f2fs_nm_info {
        /* NAT cache management */
        struct radix_tree_root nat_root;/* root of the nat entry cache */
        struct radix_tree_root nat_set_root;/* root of the nat set cache */
-       struct percpu_rw_semaphore nat_tree_lock;       /* protect nat_tree_lock */
+       struct rw_semaphore nat_tree_lock;      /* protect nat_tree_lock */
        struct list_head nat_entries;   /* cached nat entry list (clean) */
        unsigned int nat_cnt;           /* the # of cached nat entries */
        unsigned int dirty_nat_cnt;     /* total num of nat entries in set */
@@ -787,7 +787,7 @@ struct f2fs_sb_info {
        struct f2fs_checkpoint *ckpt;           /* raw checkpoint pointer */
        struct inode *meta_inode;               /* cache meta blocks */
        struct mutex cp_mutex;                  /* checkpoint procedure lock */
-       struct percpu_rw_semaphore cp_rwsem;            /* blocking FS operations */
+       struct rw_semaphore cp_rwsem;           /* blocking FS operations */
        struct rw_semaphore node_write;         /* locking node writes */
        wait_queue_head_t cp_wait;
        unsigned long last_time[MAX_TIME];      /* to store time in jiffies */
@@ -1074,22 +1074,22 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
 
 static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
 {
-       percpu_down_read(&sbi->cp_rwsem);
+       down_read(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
 {
-       percpu_up_read(&sbi->cp_rwsem);
+       up_read(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
 {
-       percpu_down_write(&sbi->cp_rwsem);
+       down_write(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
 {
-       percpu_up_write(&sbi->cp_rwsem);
+       up_write(&sbi->cp_rwsem);
 }
 
 static inline int __get_cp_reason(struct f2fs_sb_info *sbi)
index 0e493f6..28f4f4c 100644 (file)
@@ -1757,21 +1757,14 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
 {
        struct fscrypt_policy policy;
        struct inode *inode = file_inode(filp);
-       int ret;
 
        if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg,
                                                        sizeof(policy)))
                return -EFAULT;
 
-       ret = mnt_want_write_file(filp);
-       if (ret)
-               return ret;
-
        f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
-       ret = fscrypt_process_policy(inode, &policy);
 
-       mnt_drop_write_file(filp);
-       return ret;
+       return fscrypt_process_policy(filp, &policy);
 }
 
 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
@@ -2086,15 +2079,19 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
        if (unlikely(f2fs_readonly(src->i_sb)))
                return -EROFS;
 
-       if (S_ISDIR(src->i_mode) || S_ISDIR(dst->i_mode))
-               return -EISDIR;
+       if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode))
+               return -EINVAL;
 
        if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst))
                return -EOPNOTSUPP;
 
        inode_lock(src);
-       if (src != dst)
-               inode_lock(dst);
+       if (src != dst) {
+               if (!inode_trylock(dst)) {
+                       ret = -EBUSY;
+                       goto out;
+               }
+       }
 
        ret = -EINVAL;
        if (pos_in + len > src->i_size || pos_in + len < pos_in)
@@ -2152,6 +2149,7 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
 out_unlock:
        if (src != dst)
                inode_unlock(dst);
+out:
        inode_unlock(src);
        return ret;
 }
index b2fa4b6..f75d197 100644 (file)
@@ -206,14 +206,14 @@ int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
        struct nat_entry *e;
        bool need = false;
 
-       percpu_down_read(&nm_i->nat_tree_lock);
+       down_read(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, nid);
        if (e) {
                if (!get_nat_flag(e, IS_CHECKPOINTED) &&
                                !get_nat_flag(e, HAS_FSYNCED_INODE))
                        need = true;
        }
-       percpu_up_read(&nm_i->nat_tree_lock);
+       up_read(&nm_i->nat_tree_lock);
        return need;
 }
 
@@ -223,11 +223,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
        struct nat_entry *e;
        bool is_cp = true;
 
-       percpu_down_read(&nm_i->nat_tree_lock);
+       down_read(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, nid);
        if (e && !get_nat_flag(e, IS_CHECKPOINTED))
                is_cp = false;
-       percpu_up_read(&nm_i->nat_tree_lock);
+       up_read(&nm_i->nat_tree_lock);
        return is_cp;
 }
 
@@ -237,13 +237,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
        struct nat_entry *e;
        bool need_update = true;
 
-       percpu_down_read(&nm_i->nat_tree_lock);
+       down_read(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, ino);
        if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
                        (get_nat_flag(e, IS_CHECKPOINTED) ||
                         get_nat_flag(e, HAS_FSYNCED_INODE)))
                need_update = false;
-       percpu_up_read(&nm_i->nat_tree_lock);
+       up_read(&nm_i->nat_tree_lock);
        return need_update;
 }
 
@@ -284,7 +284,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct nat_entry *e;
 
-       percpu_down_write(&nm_i->nat_tree_lock);
+       down_write(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, ni->nid);
        if (!e) {
                e = grab_nat_entry(nm_i, ni->nid);
@@ -334,7 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
                        set_nat_flag(e, HAS_FSYNCED_INODE, true);
                set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
        }
-       percpu_up_write(&nm_i->nat_tree_lock);
+       up_write(&nm_i->nat_tree_lock);
 }
 
 int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -342,7 +342,8 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        int nr = nr_shrink;
 
-       percpu_down_write(&nm_i->nat_tree_lock);
+       if (!down_write_trylock(&nm_i->nat_tree_lock))
+               return 0;
 
        while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
                struct nat_entry *ne;
@@ -351,7 +352,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
                __del_from_nat_cache(nm_i, ne);
                nr_shrink--;
        }
-       percpu_up_write(&nm_i->nat_tree_lock);
+       up_write(&nm_i->nat_tree_lock);
        return nr - nr_shrink;
 }
 
@@ -373,13 +374,13 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
        ni->nid = nid;
 
        /* Check nat cache */
-       percpu_down_read(&nm_i->nat_tree_lock);
+       down_read(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, nid);
        if (e) {
                ni->ino = nat_get_ino(e);
                ni->blk_addr = nat_get_blkaddr(e);
                ni->version = nat_get_version(e);
-               percpu_up_read(&nm_i->nat_tree_lock);
+               up_read(&nm_i->nat_tree_lock);
                return;
        }
 
@@ -403,11 +404,11 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
        node_info_from_raw_nat(ni, &ne);
        f2fs_put_page(page, 1);
 cache:
-       percpu_up_read(&nm_i->nat_tree_lock);
+       up_read(&nm_i->nat_tree_lock);
        /* cache nat entry */
-       percpu_down_write(&nm_i->nat_tree_lock);
+       down_write(&nm_i->nat_tree_lock);
        cache_nat_entry(sbi, nid, &ne);
-       percpu_up_write(&nm_i->nat_tree_lock);
+       up_write(&nm_i->nat_tree_lock);
 }
 
 /*
@@ -1788,7 +1789,7 @@ void build_free_nids(struct f2fs_sb_info *sbi)
        ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
                                                        META_NAT, true);
 
-       percpu_down_read(&nm_i->nat_tree_lock);
+       down_read(&nm_i->nat_tree_lock);
 
        while (1) {
                struct page *page = get_current_nat_page(sbi, nid);
@@ -1820,7 +1821,7 @@ void build_free_nids(struct f2fs_sb_info *sbi)
                        remove_free_nid(nm_i, nid);
        }
        up_read(&curseg->journal_rwsem);
-       percpu_up_read(&nm_i->nat_tree_lock);
+       up_read(&nm_i->nat_tree_lock);
 
        ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
                                        nm_i->ra_nid_pages, META_NAT, false);
@@ -2209,7 +2210,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
        if (!nm_i->dirty_nat_cnt)
                return;
 
-       percpu_down_write(&nm_i->nat_tree_lock);
+       down_write(&nm_i->nat_tree_lock);
 
        /*
         * if there are no enough space in journal to store dirty nat
@@ -2232,7 +2233,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
        list_for_each_entry_safe(set, tmp, &sets, set_list)
                __flush_nat_entry_set(sbi, set);
 
-       percpu_up_write(&nm_i->nat_tree_lock);
+       up_write(&nm_i->nat_tree_lock);
 
        f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
 }
@@ -2268,8 +2269,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 
        mutex_init(&nm_i->build_lock);
        spin_lock_init(&nm_i->free_nid_list_lock);
-       if (percpu_init_rwsem(&nm_i->nat_tree_lock))
-               return -ENOMEM;
+       init_rwsem(&nm_i->nat_tree_lock);
 
        nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
        nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
@@ -2326,7 +2326,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
        spin_unlock(&nm_i->free_nid_list_lock);
 
        /* destroy nat cache */
-       percpu_down_write(&nm_i->nat_tree_lock);
+       down_write(&nm_i->nat_tree_lock);
        while ((found = __gang_lookup_nat_cache(nm_i,
                                        nid, NATVEC_SIZE, natvec))) {
                unsigned idx;
@@ -2351,9 +2351,8 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
                        kmem_cache_free(nat_entry_set_slab, setvec[idx]);
                }
        }
-       percpu_up_write(&nm_i->nat_tree_lock);
+       up_write(&nm_i->nat_tree_lock);
 
-       percpu_free_rwsem(&nm_i->nat_tree_lock);
        kfree(nm_i->nat_bitmap);
        sbi->nm_info = NULL;
        kfree(nm_i);
index 1b86d3f..7f863a6 100644 (file)
@@ -706,8 +706,6 @@ static void destroy_percpu_info(struct f2fs_sb_info *sbi)
                percpu_counter_destroy(&sbi->nr_pages[i]);
        percpu_counter_destroy(&sbi->alloc_valid_block_count);
        percpu_counter_destroy(&sbi->total_valid_inode_count);
-
-       percpu_free_rwsem(&sbi->cp_rwsem);
 }
 
 static void f2fs_put_super(struct super_block *sb)
@@ -1483,9 +1481,6 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
 {
        int i, err;
 
-       if (percpu_init_rwsem(&sbi->cp_rwsem))
-               return -ENOMEM;
-
        for (i = 0; i < NR_COUNT_TYPE; i++) {
                err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL);
                if (err)
@@ -1686,6 +1681,7 @@ try_onemore:
                sbi->write_io[i].bio = NULL;
        }
 
+       init_rwsem(&sbi->cp_rwsem);
        init_waitqueue_head(&sbi->cp_wait);
        init_sb_info(sbi);
 
index f394aff..3988b43 100644 (file)
@@ -530,13 +530,13 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
        req->out.args[0].size = count;
 }
 
-static void fuse_release_user_pages(struct fuse_req *req, int write)
+static void fuse_release_user_pages(struct fuse_req *req, bool should_dirty)
 {
        unsigned i;
 
        for (i = 0; i < req->num_pages; i++) {
                struct page *page = req->pages[i];
-               if (write)
+               if (should_dirty)
                        set_page_dirty_lock(page);
                put_page(page);
        }
@@ -1320,6 +1320,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
                       loff_t *ppos, int flags)
 {
        int write = flags & FUSE_DIO_WRITE;
+       bool should_dirty = !write && iter_is_iovec(iter);
        int cuse = flags & FUSE_DIO_CUSE;
        struct file *file = io->file;
        struct inode *inode = file->f_mapping->host;
@@ -1363,7 +1364,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
                        nres = fuse_send_read(req, io, pos, nbytes, owner);
 
                if (!io->async)
-                       fuse_release_user_pages(req, !write);
+                       fuse_release_user_pages(req, should_dirty);
                if (req->out.h.error) {
                        err = req->out.h.error;
                        break;
index 48141b8..706270f 100644 (file)
@@ -84,8 +84,11 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
         * Now the data has been copied, commit the range we've copied.  This
         * should not fail unless the filesystem has had a fatal error.
         */
-       ret = ops->iomap_end(inode, pos, length, written > 0 ? written : 0,
-                       flags, &iomap);
+       if (ops->iomap_end) {
+               ret = ops->iomap_end(inode, pos, length,
+                                    written > 0 ? written : 0,
+                                    flags, &iomap);
+       }
 
        return written ? written : ret;
 }
@@ -194,12 +197,9 @@ again:
                if (mapping_writably_mapped(inode->i_mapping))
                        flush_dcache_page(page);
 
-               pagefault_disable();
                copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
-               pagefault_enable();
 
                flush_dcache_page(page);
-               mark_page_accessed(page);
 
                status = iomap_write_end(inode, pos, bytes, copied, page);
                if (unlikely(status < 0))
@@ -428,9 +428,12 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
                break;
        }
 
+       if (iomap->flags & IOMAP_F_MERGED)
+               flags |= FIEMAP_EXTENT_MERGED;
+
        return fiemap_fill_next_extent(fi, iomap->offset,
                        iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0,
-                       iomap->length, flags | FIEMAP_EXTENT_MERGED);
+                       iomap->length, flags);
 
 }
 
@@ -470,13 +473,18 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
        if (ret)
                return ret;
 
-       ret = filemap_write_and_wait(inode->i_mapping);
-       if (ret)
-               return ret;
+       if (fi->fi_flags & FIEMAP_FLAG_SYNC) {
+               ret = filemap_write_and_wait(inode->i_mapping);
+               if (ret)
+                       return ret;
+       }
 
        while (len > 0) {
                ret = iomap_apply(inode, start, len, 0, ops, &ctx,
                                iomap_fiemap_actor);
+               /* inode with no (attribute) mapping will give ENOENT */
+               if (ret == -ENOENT)
+                       break;
                if (ret < 0)
                        return ret;
                if (ret == 0)
index e157400..2bcb86e 100644 (file)
@@ -840,21 +840,35 @@ repeat:
        mutex_lock(&kernfs_mutex);
 
        list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
+               struct kernfs_node *parent;
                struct inode *inode;
-               struct dentry *dentry;
 
+               /*
+                * We want fsnotify_modify() on @kn but as the
+                * modifications aren't originating from userland don't
+                * have the matching @file available.  Look up the inodes
+                * and generate the events manually.
+                */
                inode = ilookup(info->sb, kn->ino);
                if (!inode)
                        continue;
 
-               dentry = d_find_any_alias(inode);
-               if (dentry) {
-                       fsnotify_parent(NULL, dentry, FS_MODIFY);
-                       fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE,
-                                NULL, 0);
-                       dput(dentry);
+               parent = kernfs_get_parent(kn);
+               if (parent) {
+                       struct inode *p_inode;
+
+                       p_inode = ilookup(info->sb, parent->ino);
+                       if (p_inode) {
+                               fsnotify(p_inode, FS_MODIFY | FS_EVENT_ON_CHILD,
+                                        inode, FSNOTIFY_EVENT_INODE, kn->name, 0);
+                               iput(p_inode);
+                       }
+
+                       kernfs_put(parent);
                }
 
+               fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE,
+                        kn->name, 0);
                iput(inode);
        }
 
index f55a4e7..2178476 100644 (file)
@@ -346,7 +346,7 @@ static void bl_write_cleanup(struct work_struct *work)
                        PAGE_SIZE - 1) & (loff_t)PAGE_MASK;
 
                ext_tree_mark_written(bl, start >> SECTOR_SHIFT,
-                                       (end - start) >> SECTOR_SHIFT);
+                                       (end - start) >> SECTOR_SHIFT, end);
        }
 
        pnfs_ld_write_done(hdr);
index 18e6fd0..efc007f 100644 (file)
@@ -141,6 +141,7 @@ struct pnfs_block_layout {
        struct rb_root          bl_ext_ro;
        spinlock_t              bl_ext_lock;   /* Protects list manipulation */
        bool                    bl_scsi_layout;
+       u64                     bl_lwb;
 };
 
 static inline struct pnfs_block_layout *
@@ -182,7 +183,7 @@ int ext_tree_insert(struct pnfs_block_layout *bl,
 int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, sector_t start,
                sector_t end);
 int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
-               sector_t len);
+               sector_t len, u64 lwb);
 bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect,
                struct pnfs_block_extent *ret, bool rw);
 int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg);
index 992bcb1..c85fbfd 100644 (file)
@@ -402,7 +402,7 @@ ext_tree_split(struct rb_root *root, struct pnfs_block_extent *be,
 
 int
 ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
-               sector_t len)
+               sector_t len, u64 lwb)
 {
        struct rb_root *root = &bl->bl_ext_rw;
        sector_t end = start + len;
@@ -471,6 +471,8 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
                }
        }
 out:
+       if (bl->bl_lwb < lwb)
+               bl->bl_lwb = lwb;
        spin_unlock(&bl->bl_ext_lock);
 
        __ext_put_deviceids(&tmp);
@@ -518,7 +520,7 @@ static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
 }
 
 static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
-               size_t buffer_size, size_t *count)
+               size_t buffer_size, size_t *count, __u64 *lastbyte)
 {
        struct pnfs_block_extent *be;
        int ret = 0;
@@ -542,6 +544,8 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
                        p = encode_block_extent(be, p);
                be->be_tag = EXTENT_COMMITTING;
        }
+       *lastbyte = bl->bl_lwb - 1;
+       bl->bl_lwb = 0;
        spin_unlock(&bl->bl_ext_lock);
 
        return ret;
@@ -564,7 +568,7 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
        arg->layoutupdate_pages = &arg->layoutupdate_page;
 
 retry:
-       ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count);
+       ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten);
        if (unlikely(ret)) {
                ext_tree_free_commitdata(arg, buffer_size);
 
index a7f2e6e..52a2831 100644 (file)
@@ -275,6 +275,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
 err_socks:
        svc_rpcb_cleanup(serv, net);
 err_bind:
+       nn->cb_users[minorversion]--;
        dprintk("NFS: Couldn't create callback socket: err = %d; "
                        "net = %p\n", ret, net);
        return ret;
index c92a75e..f953ef6 100644 (file)
@@ -454,11 +454,8 @@ static bool referring_call_exists(struct nfs_client *clp,
                                ((u32 *)&rclist->rcl_sessionid.data)[3],
                                ref->rc_sequenceid, ref->rc_slotid);
 
-                       spin_lock(&tbl->slot_tbl_lock);
-                       status = (test_bit(ref->rc_slotid, tbl->used_slots) &&
-                                 tbl->slots[ref->rc_slotid].seq_nr ==
-                                       ref->rc_sequenceid);
-                       spin_unlock(&tbl->slot_tbl_lock);
+                       status = nfs4_slot_wait_on_seqid(tbl, ref->rc_slotid,
+                                       ref->rc_sequenceid, HZ >> 1) < 0;
                        if (status)
                                goto out;
                }
@@ -487,7 +484,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
                goto out;
 
        tbl = &clp->cl_session->bc_slot_table;
-       slot = tbl->slots + args->csa_slotid;
 
        /* Set up res before grabbing the spinlock */
        memcpy(&res->csr_sessionid, &args->csa_sessionid,
index 003ebce..1e10678 100644 (file)
@@ -426,7 +426,7 @@ EXPORT_SYMBOL_GPL(nfs_mark_client_ready);
  * Initialise the timeout values for a connection
  */
 void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
-                                   unsigned int timeo, unsigned int retrans)
+                                   int timeo, int retrans)
 {
        to->to_initval = timeo * HZ / 10;
        to->to_retries = retrans;
@@ -434,9 +434,9 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
        switch (proto) {
        case XPRT_TRANSPORT_TCP:
        case XPRT_TRANSPORT_RDMA:
-               if (to->to_retries == 0)
+               if (retrans == NFS_UNSPEC_RETRANS)
                        to->to_retries = NFS_DEF_TCP_RETRANS;
-               if (to->to_initval == 0)
+               if (timeo == NFS_UNSPEC_TIMEO || to->to_retries == 0)
                        to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10;
                if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
                        to->to_initval = NFS_MAX_TCP_TIMEOUT;
@@ -449,9 +449,9 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
                to->to_exponential = 0;
                break;
        case XPRT_TRANSPORT_UDP:
-               if (to->to_retries == 0)
+               if (retrans == NFS_UNSPEC_RETRANS)
                        to->to_retries = NFS_DEF_UDP_RETRANS;
-               if (!to->to_initval)
+               if (timeo == NFS_UNSPEC_TIMEO || to->to_initval == 0)
                        to->to_initval = NFS_DEF_UDP_TIMEO * HZ / 10;
                if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
                        to->to_initval = NFS_MAX_UDP_TIMEOUT;
index e6206ea..51b5136 100644 (file)
@@ -37,6 +37,7 @@ ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
        if (ffl) {
                INIT_LIST_HEAD(&ffl->error_list);
                INIT_LIST_HEAD(&ffl->mirrors);
+               ffl->last_report_time = ktime_get();
                return &ffl->generic_hdr;
        } else
                return NULL;
@@ -640,19 +641,18 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
 {
        static const ktime_t notime = {0};
        s64 report_interval = FF_LAYOUTSTATS_REPORT_INTERVAL;
+       struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout);
 
        nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now);
        if (ktime_equal(mirror->start_time, notime))
                mirror->start_time = now;
-       if (ktime_equal(mirror->last_report_time, notime))
-               mirror->last_report_time = now;
        if (mirror->report_interval != 0)
                report_interval = (s64)mirror->report_interval * 1000LL;
        else if (layoutstats_timer != 0)
                report_interval = (s64)layoutstats_timer * 1000LL;
-       if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >=
+       if (ktime_to_ms(ktime_sub(now, ffl->last_report_time)) >=
                        report_interval) {
-               mirror->last_report_time = now;
+               ffl->last_report_time = now;
                return true;
        }
 
@@ -806,11 +806,14 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
 {
        struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
        struct nfs4_pnfs_ds *ds;
+       bool fail_return = false;
        int idx;
 
        /* mirrors are sorted by efficiency */
        for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
-               ds = nfs4_ff_layout_prepare_ds(lseg, idx, false);
+               if (idx+1 == fls->mirror_array_cnt)
+                       fail_return = true;
+               ds = nfs4_ff_layout_prepare_ds(lseg, idx, fail_return);
                if (ds) {
                        *best_idx = idx;
                        return ds;
@@ -859,6 +862,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
        struct nfs4_pnfs_ds *ds;
        int ds_idx;
 
+retry:
        /* Use full layout for now */
        if (!pgio->pg_lseg)
                ff_layout_pg_get_read(pgio, req, false);
@@ -871,10 +875,13 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
 
        ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx);
        if (!ds) {
-               if (ff_layout_no_fallback_to_mds(pgio->pg_lseg))
-                       goto out_pnfs;
-               else
+               if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
                        goto out_mds;
+               pnfs_put_lseg(pgio->pg_lseg);
+               pgio->pg_lseg = NULL;
+               /* Sleep for 1 second before retrying */
+               ssleep(1);
+               goto retry;
        }
 
        mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
@@ -890,12 +897,6 @@ out_mds:
        pnfs_put_lseg(pgio->pg_lseg);
        pgio->pg_lseg = NULL;
        nfs_pageio_reset_read_mds(pgio);
-       return;
-
-out_pnfs:
-       pnfs_set_lo_fail(pgio->pg_lseg);
-       pnfs_put_lseg(pgio->pg_lseg);
-       pgio->pg_lseg = NULL;
 }
 
 static void
@@ -909,6 +910,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
        int i;
        int status;
 
+retry:
        if (!pgio->pg_lseg) {
                pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
                                                   req->wb_context,
@@ -940,10 +942,13 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
        for (i = 0; i < pgio->pg_mirror_count; i++) {
                ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true);
                if (!ds) {
-                       if (ff_layout_no_fallback_to_mds(pgio->pg_lseg))
-                               goto out_pnfs;
-                       else
+                       if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
                                goto out_mds;
+                       pnfs_put_lseg(pgio->pg_lseg);
+                       pgio->pg_lseg = NULL;
+                       /* Sleep for 1 second before retrying */
+                       ssleep(1);
+                       goto retry;
                }
                pgm = &pgio->pg_mirrors[i];
                mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
@@ -956,12 +961,6 @@ out_mds:
        pnfs_put_lseg(pgio->pg_lseg);
        pgio->pg_lseg = NULL;
        nfs_pageio_reset_write_mds(pgio);
-       return;
-
-out_pnfs:
-       pnfs_set_lo_fail(pgio->pg_lseg);
-       pnfs_put_lseg(pgio->pg_lseg);
-       pgio->pg_lseg = NULL;
 }
 
 static unsigned int
index 1bcdb15..3ee0c9f 100644 (file)
@@ -84,7 +84,6 @@ struct nfs4_ff_layout_mirror {
        struct nfs4_ff_layoutstat       read_stat;
        struct nfs4_ff_layoutstat       write_stat;
        ktime_t                         start_time;
-       ktime_t                         last_report_time;
        u32                             report_interval;
 };
 
@@ -101,6 +100,7 @@ struct nfs4_flexfile_layout {
        struct pnfs_ds_commit_info commit_info;
        struct list_head        mirrors;
        struct list_head        error_list; /* nfs4_ff_layout_ds_err */
+       ktime_t                 last_report_time; /* Layoutstat report times */
 };
 
 static inline struct nfs4_flexfile_layout *
index 0aa36be..f7a3f6b 100644 (file)
@@ -17,8 +17,8 @@
 
 #define NFSDBG_FACILITY                NFSDBG_PNFS_LD
 
-static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO;
-static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS;
+static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS;
+static unsigned int dataserver_retrans;
 
 void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds)
 {
@@ -379,7 +379,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
 
        devid = &mirror->mirror_ds->id_node;
        if (ff_layout_test_devid_unavailable(devid))
-               goto out;
+               goto out_fail;
 
        ds = mirror->mirror_ds->ds;
        /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
@@ -405,15 +405,16 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
                        mirror->mirror_ds->ds_versions[0].rsize = max_payload;
                if (mirror->mirror_ds->ds_versions[0].wsize > max_payload)
                        mirror->mirror_ds->ds_versions[0].wsize = max_payload;
-       } else {
-               ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
-                                        mirror, lseg->pls_range.offset,
-                                        lseg->pls_range.length, NFS4ERR_NXIO,
-                                        OP_ILLEGAL, GFP_NOIO);
-               if (fail_return || !ff_layout_has_available_ds(lseg))
-                       pnfs_error_mark_layout_for_return(ino, lseg);
-               ds = NULL;
+               goto out;
        }
+       ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
+                                mirror, lseg->pls_range.offset,
+                                lseg->pls_range.length, NFS4ERR_NXIO,
+                                OP_ILLEGAL, GFP_NOIO);
+out_fail:
+       if (fail_return || !ff_layout_has_available_ds(lseg))
+               pnfs_error_mark_layout_for_return(ino, lseg);
+       ds = NULL;
 out:
        return ds;
 }
index 7ce5e02..74935a1 100644 (file)
@@ -58,6 +58,9 @@ struct nfs_clone_mount {
  */
 #define NFS_UNSPEC_PORT                (-1)
 
+#define NFS_UNSPEC_RETRANS     (UINT_MAX)
+#define NFS_UNSPEC_TIMEO       (UINT_MAX)
+
 /*
  * Maximum number of pages that readdir can use for creating
  * a vmapped array of pages.
@@ -156,7 +159,7 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *,
 int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *);
 void nfs_server_insert_lists(struct nfs_server *);
 void nfs_server_remove_lists(struct nfs_server *);
-void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int);
+void nfs_init_timeout_values(struct rpc_timeout *to, int proto, int timeo, int retrans);
 int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t,
                rpc_authflavor_t);
 struct nfs_server *nfs_alloc_server(void);
index 6f47527..64b43b4 100644 (file)
@@ -318,10 +318,22 @@ static void
 nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata)
 {
        struct nfs42_layoutstat_data *data = calldata;
-       struct nfs_server *server = NFS_SERVER(data->args.inode);
+       struct inode *inode = data->inode;
+       struct nfs_server *server = NFS_SERVER(inode);
+       struct pnfs_layout_hdr *lo;
 
+       spin_lock(&inode->i_lock);
+       lo = NFS_I(inode)->layout;
+       if (!pnfs_layout_is_valid(lo)) {
+               spin_unlock(&inode->i_lock);
+               rpc_exit(task, 0);
+               return;
+       }
+       nfs4_stateid_copy(&data->args.stateid, &lo->plh_stateid);
+       spin_unlock(&inode->i_lock);
        nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args,
                             &data->res.seq_res, task);
+
 }
 
 static void
@@ -341,11 +353,11 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
        case -NFS4ERR_ADMIN_REVOKED:
        case -NFS4ERR_DELEG_REVOKED:
        case -NFS4ERR_STALE_STATEID:
-       case -NFS4ERR_OLD_STATEID:
        case -NFS4ERR_BAD_STATEID:
                spin_lock(&inode->i_lock);
                lo = NFS_I(inode)->layout;
-               if (lo && nfs4_stateid_match(&data->args.stateid,
+               if (pnfs_layout_is_valid(lo) &&
+                   nfs4_stateid_match(&data->args.stateid,
                                             &lo->plh_stateid)) {
                        LIST_HEAD(head);
 
@@ -359,11 +371,23 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
                } else
                        spin_unlock(&inode->i_lock);
                break;
+       case -NFS4ERR_OLD_STATEID:
+               spin_lock(&inode->i_lock);
+               lo = NFS_I(inode)->layout;
+               if (pnfs_layout_is_valid(lo) &&
+                   nfs4_stateid_match_other(&data->args.stateid,
+                                       &lo->plh_stateid)) {
+                       /* Do we need to delay before resending? */
+                       if (!nfs4_stateid_is_newer(&lo->plh_stateid,
+                                               &data->args.stateid))
+                               rpc_delay(task, HZ);
+                       rpc_restart_call_prepare(task);
+               }
+               spin_unlock(&inode->i_lock);
+               break;
        case -ENOTSUPP:
        case -EOPNOTSUPP:
                NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS;
-       default:
-               break;
        }
 
        dprintk("%s server returns %d\n", __func__, task->tk_status);
index 8d7d08d..cd3b7cf 100644 (file)
@@ -817,6 +817,11 @@ static int nfs4_set_client(struct nfs_server *server,
                goto error;
        }
 
+       if (server->nfs_client == clp) {
+               error = -ELOOP;
+               goto error;
+       }
+
        /*
         * Query for the lease time on clientid setup or renewal
         *
index 1949bbd..f5aecaa 100644 (file)
@@ -634,15 +634,11 @@ out_sleep:
 }
 EXPORT_SYMBOL_GPL(nfs40_setup_sequence);
 
-static int nfs40_sequence_done(struct rpc_task *task,
-                              struct nfs4_sequence_res *res)
+static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res)
 {
        struct nfs4_slot *slot = res->sr_slot;
        struct nfs4_slot_table *tbl;
 
-       if (slot == NULL)
-               goto out;
-
        tbl = slot->table;
        spin_lock(&tbl->slot_tbl_lock);
        if (!nfs41_wake_and_assign_slot(tbl, slot))
@@ -650,7 +646,13 @@ static int nfs40_sequence_done(struct rpc_task *task,
        spin_unlock(&tbl->slot_tbl_lock);
 
        res->sr_slot = NULL;
-out:
+}
+
+static int nfs40_sequence_done(struct rpc_task *task,
+                              struct nfs4_sequence_res *res)
+{
+       if (res->sr_slot != NULL)
+               nfs40_sequence_free_slot(res);
        return 1;
 }
 
@@ -666,6 +668,11 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
        tbl = slot->table;
        session = tbl->session;
 
+       /* Bump the slot sequence number */
+       if (slot->seq_done)
+               slot->seq_nr++;
+       slot->seq_done = 0;
+
        spin_lock(&tbl->slot_tbl_lock);
        /* Be nice to the server: try to ensure that the last transmitted
         * value for highest_user_slotid <= target_highest_slotid
@@ -686,9 +693,12 @@ out_unlock:
        res->sr_slot = NULL;
        if (send_new_highest_used_slotid)
                nfs41_notify_server(session->clp);
+       if (waitqueue_active(&tbl->slot_waitq))
+               wake_up_all(&tbl->slot_waitq);
 }
 
-int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
+static int nfs41_sequence_process(struct rpc_task *task,
+               struct nfs4_sequence_res *res)
 {
        struct nfs4_session *session;
        struct nfs4_slot *slot = res->sr_slot;
@@ -714,7 +724,7 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
        switch (res->sr_status) {
        case 0:
                /* Update the slot's sequence and clientid lease timer */
-               ++slot->seq_nr;
+               slot->seq_done = 1;
                clp = session->clp;
                do_renew_lease(clp, res->sr_timestamp);
                /* Check sequence flags */
@@ -769,16 +779,16 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
                goto retry_nowait;
        default:
                /* Just update the slot sequence no. */
-               ++slot->seq_nr;
+               slot->seq_done = 1;
        }
 out:
        /* The session may be reset by one of the error handlers. */
        dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
-       nfs41_sequence_free_slot(res);
 out_noaction:
        return ret;
 retry_nowait:
        if (rpc_restart_call_prepare(task)) {
+               nfs41_sequence_free_slot(res);
                task->tk_status = 0;
                ret = 0;
        }
@@ -789,8 +799,37 @@ out_retry:
        rpc_delay(task, NFS4_POLL_RETRY_MAX);
        return 0;
 }
+
+int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
+{
+       if (!nfs41_sequence_process(task, res))
+               return 0;
+       if (res->sr_slot != NULL)
+               nfs41_sequence_free_slot(res);
+       return 1;
+
+}
 EXPORT_SYMBOL_GPL(nfs41_sequence_done);
 
+static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
+{
+       if (res->sr_slot == NULL)
+               return 1;
+       if (res->sr_slot->table->session != NULL)
+               return nfs41_sequence_process(task, res);
+       return nfs40_sequence_done(task, res);
+}
+
+static void nfs4_sequence_free_slot(struct nfs4_sequence_res *res)
+{
+       if (res->sr_slot != NULL) {
+               if (res->sr_slot->table->session != NULL)
+                       nfs41_sequence_free_slot(res);
+               else
+                       nfs40_sequence_free_slot(res);
+       }
+}
+
 int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
 {
        if (res->sr_slot == NULL)
@@ -920,6 +959,17 @@ static int nfs4_setup_sequence(const struct nfs_server *server,
                                    args, res, task);
 }
 
+static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
+{
+       return nfs40_sequence_done(task, res);
+}
+
+static void nfs4_sequence_free_slot(struct nfs4_sequence_res *res)
+{
+       if (res->sr_slot != NULL)
+               nfs40_sequence_free_slot(res);
+}
+
 int nfs4_sequence_done(struct rpc_task *task,
                       struct nfs4_sequence_res *res)
 {
@@ -1197,6 +1247,7 @@ static void nfs4_opendata_free(struct kref *kref)
        struct super_block *sb = p->dentry->d_sb;
 
        nfs_free_seqid(p->o_arg.seqid);
+       nfs4_sequence_free_slot(&p->o_res.seq_res);
        if (p->state != NULL)
                nfs4_put_open_state(p->state);
        nfs4_put_state_owner(p->owner);
@@ -1656,9 +1707,14 @@ err:
 static struct nfs4_state *
 nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
 {
+       struct nfs4_state *ret;
+
        if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS)
-               return _nfs4_opendata_reclaim_to_nfs4_state(data);
-       return _nfs4_opendata_to_nfs4_state(data);
+               ret =_nfs4_opendata_reclaim_to_nfs4_state(data);
+       else
+               ret = _nfs4_opendata_to_nfs4_state(data);
+       nfs4_sequence_free_slot(&data->o_res.seq_res);
+       return ret;
 }
 
 static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
@@ -2056,7 +2112,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
 
        data->rpc_status = task->tk_status;
 
-       if (!nfs4_sequence_done(task, &data->o_res.seq_res))
+       if (!nfs4_sequence_process(task, &data->o_res.seq_res))
                return;
 
        if (task->tk_status == 0) {
@@ -7864,7 +7920,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
        struct nfs4_layoutget *lgp = calldata;
 
        dprintk("--> %s\n", __func__);
-       nfs41_sequence_done(task, &lgp->res.seq_res);
+       nfs41_sequence_process(task, &lgp->res.seq_res);
        dprintk("<-- %s\n", __func__);
 }
 
@@ -8080,6 +8136,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
        /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
        if (status == 0 && lgp->res.layoutp->len)
                lseg = pnfs_layout_process(lgp);
+       nfs4_sequence_free_slot(&lgp->res.seq_res);
        rpc_put_task(task);
        dprintk("<-- %s status=%d\n", __func__, status);
        if (status)
@@ -8106,7 +8163,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
 
        dprintk("--> %s\n", __func__);
 
-       if (!nfs41_sequence_done(task, &lrp->res.seq_res))
+       if (!nfs41_sequence_process(task, &lrp->res.seq_res))
                return;
 
        server = NFS_SERVER(lrp->args.inode);
@@ -8118,6 +8175,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
        case -NFS4ERR_DELAY:
                if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN)
                        break;
+               nfs4_sequence_free_slot(&lrp->res.seq_res);
                rpc_restart_call_prepare(task);
                return;
        }
@@ -8138,6 +8196,7 @@ static void nfs4_layoutreturn_release(void *calldata)
                pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
        pnfs_clear_layoutreturn_waitbit(lo);
        spin_unlock(&lo->plh_inode->i_lock);
+       nfs4_sequence_free_slot(&lrp->res.seq_res);
        pnfs_free_lseg_list(&freeme);
        pnfs_put_layout_hdr(lrp->args.layout);
        nfs_iput_and_deactive(lrp->inode);
index 332d06e..b629730 100644 (file)
@@ -28,6 +28,7 @@ static void nfs4_init_slot_table(struct nfs4_slot_table *tbl, const char *queue)
        tbl->highest_used_slotid = NFS4_NO_SLOT;
        spin_lock_init(&tbl->slot_tbl_lock);
        rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, queue);
+       init_waitqueue_head(&tbl->slot_waitq);
        init_completion(&tbl->complete);
 }
 
@@ -172,6 +173,58 @@ struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid)
        return ERR_PTR(-E2BIG);
 }
 
+static int nfs4_slot_get_seqid(struct nfs4_slot_table  *tbl, u32 slotid,
+               u32 *seq_nr)
+       __must_hold(&tbl->slot_tbl_lock)
+{
+       struct nfs4_slot *slot;
+
+       slot = nfs4_lookup_slot(tbl, slotid);
+       if (IS_ERR(slot))
+               return PTR_ERR(slot);
+       *seq_nr = slot->seq_nr;
+       return 0;
+}
+
+/*
+ * nfs4_slot_seqid_in_use - test if a slot sequence id is still in use
+ *
+ * Given a slot table, slot id and sequence number, determine if the
+ * RPC call in question is still in flight. This function is mainly
+ * intended for use by the callback channel.
+ */
+static bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl,
+               u32 slotid, u32 seq_nr)
+{
+       u32 cur_seq;
+       bool ret = false;
+
+       spin_lock(&tbl->slot_tbl_lock);
+       if (nfs4_slot_get_seqid(tbl, slotid, &cur_seq) == 0 &&
+           cur_seq == seq_nr && test_bit(slotid, tbl->used_slots))
+               ret = true;
+       spin_unlock(&tbl->slot_tbl_lock);
+       return ret;
+}
+
+/*
+ * nfs4_slot_wait_on_seqid - wait until a slot sequence id is complete
+ *
+ * Given a slot table, slot id and sequence number, wait until the
+ * corresponding RPC call completes. This function is mainly
+ * intended for use by the callback channel.
+ */
+int nfs4_slot_wait_on_seqid(struct nfs4_slot_table *tbl,
+               u32 slotid, u32 seq_nr,
+               unsigned long timeout)
+{
+       if (wait_event_timeout(tbl->slot_waitq,
+                       !nfs4_slot_seqid_in_use(tbl, slotid, seq_nr),
+                       timeout) == 0)
+               return -ETIMEDOUT;
+       return 0;
+}
+
 /*
  * nfs4_alloc_slot - efficiently look for a free slot
  *
index 5b51298..f703b75 100644 (file)
@@ -21,7 +21,8 @@ struct nfs4_slot {
        unsigned long           generation;
        u32                     slot_nr;
        u32                     seq_nr;
-       unsigned int            interrupted : 1;
+       unsigned int            interrupted : 1,
+                               seq_done : 1;
 };
 
 /* Sessions */
@@ -36,6 +37,7 @@ struct nfs4_slot_table {
        unsigned long   used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
        spinlock_t      slot_tbl_lock;
        struct rpc_wait_queue   slot_tbl_waitq; /* allocators may wait here */
+       wait_queue_head_t       slot_waitq;     /* Completion wait on slot */
        u32             max_slots;              /* # slots in table */
        u32             max_slotid;             /* Max allowed slotid value */
        u32             highest_used_slotid;    /* sent to server on each SEQ.
@@ -78,6 +80,9 @@ extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
 extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl);
 extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
 extern struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid);
+extern int nfs4_slot_wait_on_seqid(struct nfs4_slot_table *tbl,
+               u32 slotid, u32 seq_nr,
+               unsigned long timeout);
 extern bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
 extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
 extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);
index 70806ca..6daf034 100644 (file)
@@ -1555,6 +1555,7 @@ pnfs_update_layout(struct inode *ino,
        }
 
 lookup_again:
+       nfs4_client_recover_expired_lease(clp);
        first = false;
        spin_lock(&ino->i_lock);
        lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
@@ -2510,7 +2511,6 @@ pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags)
 
        data->args.fh = NFS_FH(inode);
        data->args.inode = inode;
-       nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid);
        status = ld->prepare_layoutstats(&data->args);
        if (status)
                goto out_free;
index 18d446e..d396013 100644 (file)
@@ -923,6 +923,8 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
 
        data = kzalloc(sizeof(*data), GFP_KERNEL);
        if (data) {
+               data->timeo             = NFS_UNSPEC_TIMEO;
+               data->retrans           = NFS_UNSPEC_RETRANS;
                data->acregmin          = NFS_DEF_ACREGMIN;
                data->acregmax          = NFS_DEF_ACREGMAX;
                data->acdirmin          = NFS_DEF_ACDIRMIN;
@@ -1189,6 +1191,19 @@ static int nfs_get_option_ul(substring_t args[], unsigned long *option)
        return rc;
 }
 
+static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option,
+               unsigned long l_bound, unsigned long u_bound)
+{
+       int ret;
+
+       ret = nfs_get_option_ul(args, option);
+       if (ret != 0)
+               return ret;
+       if (*option < l_bound || *option > u_bound)
+               return -ERANGE;
+       return 0;
+}
+
 /*
  * Error-check and convert a string of mount options from user space into
  * a data structure.  The whole mount string is processed; bad options are
@@ -1352,12 +1367,12 @@ static int nfs_parse_mount_options(char *raw,
                        mnt->bsize = option;
                        break;
                case Opt_timeo:
-                       if (nfs_get_option_ul(args, &option) || option == 0)
+                       if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX))
                                goto out_invalid_value;
                        mnt->timeo = option;
                        break;
                case Opt_retrans:
-                       if (nfs_get_option_ul(args, &option) || option == 0)
+                       if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX))
                                goto out_invalid_value;
                        mnt->retrans = option;
                        break;
index 54e5d66..43fdc27 100644 (file)
@@ -80,6 +80,8 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
        }
 
        for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
+               if (ovl_is_private_xattr(name))
+                       continue;
 retry:
                size = vfs_getxattr(old, name, value, value_size);
                if (size == -ERANGE)
index 12bcd07..1560fdc 100644 (file)
@@ -12,6 +12,8 @@
 #include <linux/xattr.h>
 #include <linux/security.h>
 #include <linux/cred.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
 #include "overlayfs.h"
 
 void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
@@ -186,6 +188,9 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
        struct dentry *newdentry;
        int err;
 
+       if (!hardlink && !IS_POSIXACL(udir))
+               stat->mode &= ~current_umask();
+
        inode_lock_nested(udir, I_MUTEX_PARENT);
        newdentry = lookup_one_len(dentry->d_name.name, upperdir,
                                   dentry->d_name.len);
@@ -335,6 +340,32 @@ out_free:
        return ret;
 }
 
+static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
+                            const struct posix_acl *acl)
+{
+       void *buffer;
+       size_t size;
+       int err;
+
+       if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
+               return 0;
+
+       size = posix_acl_to_xattr(NULL, acl, NULL, 0);
+       buffer = kmalloc(size, GFP_KERNEL);
+       if (!buffer)
+               return -ENOMEM;
+
+       size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
+       err = size;
+       if (err < 0)
+               goto out_free;
+
+       err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE);
+out_free:
+       kfree(buffer);
+       return err;
+}
+
 static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
                                    struct kstat *stat, const char *link,
                                    struct dentry *hardlink)
@@ -346,10 +377,18 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
        struct dentry *upper;
        struct dentry *newdentry;
        int err;
+       struct posix_acl *acl, *default_acl;
 
        if (WARN_ON(!workdir))
                return -EROFS;
 
+       if (!hardlink) {
+               err = posix_acl_create(dentry->d_parent->d_inode,
+                                      &stat->mode, &default_acl, &acl);
+               if (err)
+                       return err;
+       }
+
        err = ovl_lock_rename_workdir(workdir, upperdir);
        if (err)
                goto out;
@@ -384,6 +423,17 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
                if (err)
                        goto out_cleanup;
        }
+       if (!hardlink) {
+               err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS,
+                                       acl);
+               if (err)
+                       goto out_cleanup;
+
+               err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT,
+                                       default_acl);
+               if (err)
+                       goto out_cleanup;
+       }
 
        if (!hardlink && S_ISDIR(stat->mode)) {
                err = ovl_set_opaque(newdentry);
@@ -410,6 +460,10 @@ out_dput:
 out_unlock:
        unlock_rename(workdir, upperdir);
 out:
+       if (!hardlink) {
+               posix_acl_release(acl);
+               posix_acl_release(default_acl);
+       }
        return err;
 
 out_cleanup:
@@ -950,9 +1004,9 @@ const struct inode_operations ovl_dir_inode_operations = {
        .permission     = ovl_permission,
        .getattr        = ovl_dir_getattr,
        .setxattr       = generic_setxattr,
-       .getxattr       = ovl_getxattr,
+       .getxattr       = generic_getxattr,
        .listxattr      = ovl_listxattr,
-       .removexattr    = ovl_removexattr,
+       .removexattr    = generic_removexattr,
        .get_acl        = ovl_get_acl,
        .update_time    = ovl_update_time,
 };
index 1b885c1..c75625c 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/xattr.h>
+#include <linux/posix_acl.h>
 #include "overlayfs.h"
 
 static int ovl_copy_up_truncate(struct dentry *dentry)
@@ -191,32 +192,44 @@ static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
        return err;
 }
 
-static bool ovl_is_private_xattr(const char *name)
+bool ovl_is_private_xattr(const char *name)
 {
-#define OVL_XATTR_PRE_NAME OVL_XATTR_PREFIX "."
-       return strncmp(name, OVL_XATTR_PRE_NAME,
-                      sizeof(OVL_XATTR_PRE_NAME) - 1) == 0;
+       return strncmp(name, OVL_XATTR_PREFIX,
+                      sizeof(OVL_XATTR_PREFIX) - 1) == 0;
 }
 
-int ovl_setxattr(struct dentry *dentry, struct inode *inode,
-                const char *name, const void *value,
-                size_t size, int flags)
+int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value,
+                 size_t size, int flags)
 {
        int err;
-       struct dentry *upperdentry;
+       struct path realpath;
+       enum ovl_path_type type = ovl_path_real(dentry, &realpath);
        const struct cred *old_cred;
 
        err = ovl_want_write(dentry);
        if (err)
                goto out;
 
+       if (!value && !OVL_TYPE_UPPER(type)) {
+               err = vfs_getxattr(realpath.dentry, name, NULL, 0);
+               if (err < 0)
+                       goto out_drop_write;
+       }
+
        err = ovl_copy_up(dentry);
        if (err)
                goto out_drop_write;
 
-       upperdentry = ovl_dentry_upper(dentry);
+       if (!OVL_TYPE_UPPER(type))
+               ovl_path_upper(dentry, &realpath);
+
        old_cred = ovl_override_creds(dentry->d_sb);
-       err = vfs_setxattr(upperdentry, name, value, size, flags);
+       if (value)
+               err = vfs_setxattr(realpath.dentry, name, value, size, flags);
+       else {
+               WARN_ON(flags != XATTR_REPLACE);
+               err = vfs_removexattr(realpath.dentry, name);
+       }
        revert_creds(old_cred);
 
 out_drop_write:
@@ -225,16 +238,13 @@ out:
        return err;
 }
 
-ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode,
-                    const char *name, void *value, size_t size)
+int ovl_xattr_get(struct dentry *dentry, const char *name,
+                 void *value, size_t size)
 {
        struct dentry *realdentry = ovl_dentry_real(dentry);
        ssize_t res;
        const struct cred *old_cred;
 
-       if (ovl_is_private_xattr(name))
-               return -ENODATA;
-
        old_cred = ovl_override_creds(dentry->d_sb);
        res = vfs_getxattr(realdentry, name, value, size);
        revert_creds(old_cred);
@@ -245,7 +255,8 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
 {
        struct dentry *realdentry = ovl_dentry_real(dentry);
        ssize_t res;
-       int off;
+       size_t len;
+       char *s;
        const struct cred *old_cred;
 
        old_cred = ovl_override_creds(dentry->d_sb);
@@ -255,73 +266,39 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
                return res;
 
        /* filter out private xattrs */
-       for (off = 0; off < res;) {
-               char *s = list + off;
-               size_t slen = strlen(s) + 1;
+       for (s = list, len = res; len;) {
+               size_t slen = strnlen(s, len) + 1;
 
-               BUG_ON(off + slen > res);
+               /* underlying fs providing us with an broken xattr list? */
+               if (WARN_ON(slen > len))
+                       return -EIO;
 
+               len -= slen;
                if (ovl_is_private_xattr(s)) {
                        res -= slen;
-                       memmove(s, s + slen, res - off);
+                       memmove(s, s + slen, len);
                } else {
-                       off += slen;
+                       s += slen;
                }
        }
 
        return res;
 }
 
-int ovl_removexattr(struct dentry *dentry, const char *name)
-{
-       int err;
-       struct path realpath;
-       enum ovl_path_type type = ovl_path_real(dentry, &realpath);
-       const struct cred *old_cred;
-
-       err = ovl_want_write(dentry);
-       if (err)
-               goto out;
-
-       err = -ENODATA;
-       if (ovl_is_private_xattr(name))
-               goto out_drop_write;
-
-       if (!OVL_TYPE_UPPER(type)) {
-               err = vfs_getxattr(realpath.dentry, name, NULL, 0);
-               if (err < 0)
-                       goto out_drop_write;
-
-               err = ovl_copy_up(dentry);
-               if (err)
-                       goto out_drop_write;
-
-               ovl_path_upper(dentry, &realpath);
-       }
-
-       old_cred = ovl_override_creds(dentry->d_sb);
-       err = vfs_removexattr(realpath.dentry, name);
-       revert_creds(old_cred);
-out_drop_write:
-       ovl_drop_write(dentry);
-out:
-       return err;
-}
-
 struct posix_acl *ovl_get_acl(struct inode *inode, int type)
 {
        struct inode *realinode = ovl_inode_real(inode, NULL);
        const struct cred *old_cred;
        struct posix_acl *acl;
 
-       if (!IS_POSIXACL(realinode))
+       if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode))
                return NULL;
 
        if (!realinode->i_op->get_acl)
                return NULL;
 
        old_cred = ovl_override_creds(inode->i_sb);
-       acl = realinode->i_op->get_acl(realinode, type);
+       acl = get_acl(realinode, type);
        revert_creds(old_cred);
 
        return acl;
@@ -391,9 +368,9 @@ static const struct inode_operations ovl_file_inode_operations = {
        .permission     = ovl_permission,
        .getattr        = ovl_getattr,
        .setxattr       = generic_setxattr,
-       .getxattr       = ovl_getxattr,
+       .getxattr       = generic_getxattr,
        .listxattr      = ovl_listxattr,
-       .removexattr    = ovl_removexattr,
+       .removexattr    = generic_removexattr,
        .get_acl        = ovl_get_acl,
        .update_time    = ovl_update_time,
 };
@@ -404,9 +381,9 @@ static const struct inode_operations ovl_symlink_inode_operations = {
        .readlink       = ovl_readlink,
        .getattr        = ovl_getattr,
        .setxattr       = generic_setxattr,
-       .getxattr       = ovl_getxattr,
+       .getxattr       = generic_getxattr,
        .listxattr      = ovl_listxattr,
-       .removexattr    = ovl_removexattr,
+       .removexattr    = generic_removexattr,
        .update_time    = ovl_update_time,
 };
 
@@ -415,6 +392,9 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode)
        inode->i_ino = get_next_ino();
        inode->i_mode = mode;
        inode->i_flags |= S_NOCMTIME;
+#ifdef CONFIG_FS_POSIX_ACL
+       inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
+#endif
 
        mode &= S_IFMT;
        switch (mode) {
index e4f5c95..5813ccf 100644 (file)
@@ -24,8 +24,8 @@ enum ovl_path_type {
        (OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type))
 
 
-#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay"
-#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX ".opaque"
+#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
+#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
 
 #define OVL_ISUPPER_MASK 1UL
 
@@ -179,20 +179,21 @@ int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
 void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
 void ovl_cache_free(struct list_head *list);
 int ovl_check_d_type_supported(struct path *realpath);
+void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
+                        struct dentry *dentry, int level);
 
 /* inode.c */
 int ovl_setattr(struct dentry *dentry, struct iattr *attr);
 int ovl_permission(struct inode *inode, int mask);
-int ovl_setxattr(struct dentry *dentry, struct inode *inode,
-                const char *name, const void *value,
-                size_t size, int flags);
-ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode,
-                    const char *name, void *value, size_t size);
+int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value,
+                 size_t size, int flags);
+int ovl_xattr_get(struct dentry *dentry, const char *name,
+                 void *value, size_t size);
 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
-int ovl_removexattr(struct dentry *dentry, const char *name);
 struct posix_acl *ovl_get_acl(struct inode *inode, int type);
 int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags);
 int ovl_update_time(struct inode *inode, struct timespec *ts, int flags);
+bool ovl_is_private_xattr(const char *name);
 
 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode);
 struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode);
index cf37fc7..f241b4e 100644 (file)
@@ -248,7 +248,7 @@ static inline int ovl_dir_read(struct path *realpath,
                        err = rdd->err;
        } while (!err && rdd->count);
 
-       if (!err && rdd->first_maybe_whiteout)
+       if (!err && rdd->first_maybe_whiteout && rdd->dentry)
                err = ovl_check_whiteouts(realpath->dentry, rdd);
 
        fput(realfile);
@@ -606,3 +606,64 @@ int ovl_check_d_type_supported(struct path *realpath)
 
        return rdd.d_type_supported;
 }
+
+static void ovl_workdir_cleanup_recurse(struct path *path, int level)
+{
+       int err;
+       struct inode *dir = path->dentry->d_inode;
+       LIST_HEAD(list);
+       struct ovl_cache_entry *p;
+       struct ovl_readdir_data rdd = {
+               .ctx.actor = ovl_fill_merge,
+               .dentry = NULL,
+               .list = &list,
+               .root = RB_ROOT,
+               .is_lowest = false,
+       };
+
+       err = ovl_dir_read(path, &rdd);
+       if (err)
+               goto out;
+
+       inode_lock_nested(dir, I_MUTEX_PARENT);
+       list_for_each_entry(p, &list, l_node) {
+               struct dentry *dentry;
+
+               if (p->name[0] == '.') {
+                       if (p->len == 1)
+                               continue;
+                       if (p->len == 2 && p->name[1] == '.')
+                               continue;
+               }
+               dentry = lookup_one_len(p->name, path->dentry, p->len);
+               if (IS_ERR(dentry))
+                       continue;
+               if (dentry->d_inode)
+                       ovl_workdir_cleanup(dir, path->mnt, dentry, level);
+               dput(dentry);
+       }
+       inode_unlock(dir);
+out:
+       ovl_cache_free(&list);
+}
+
+void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
+                        struct dentry *dentry, int level)
+{
+       int err;
+
+       if (!d_is_dir(dentry) || level > 1) {
+               ovl_cleanup(dir, dentry);
+               return;
+       }
+
+       err = ovl_do_rmdir(dir, dentry);
+       if (err) {
+               struct path path = { .mnt = mnt, .dentry = dentry };
+
+               inode_unlock(dir);
+               ovl_workdir_cleanup_recurse(&path, level + 1);
+               inode_lock_nested(dir, I_MUTEX_PARENT);
+               ovl_cleanup(dir, dentry);
+       }
+}
index 4036132..e2a94a2 100644 (file)
@@ -814,6 +814,10 @@ retry:
                struct kstat stat = {
                        .mode = S_IFDIR | 0,
                };
+               struct iattr attr = {
+                       .ia_valid = ATTR_MODE,
+                       .ia_mode = stat.mode,
+               };
 
                if (work->d_inode) {
                        err = -EEXIST;
@@ -821,7 +825,7 @@ retry:
                                goto out_dput;
 
                        retried = true;
-                       ovl_cleanup(dir, work);
+                       ovl_workdir_cleanup(dir, mnt, work, 0);
                        dput(work);
                        goto retry;
                }
@@ -829,6 +833,21 @@ retry:
                err = ovl_create_real(dir, work, &stat, NULL, NULL, true);
                if (err)
                        goto out_dput;
+
+               err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
+               if (err && err != -ENODATA && err != -EOPNOTSUPP)
+                       goto out_dput;
+
+               err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
+               if (err && err != -ENODATA && err != -EOPNOTSUPP)
+                       goto out_dput;
+
+               /* Clear any inherited mode bits */
+               inode_lock(work->d_inode);
+               err = notify_change(work, &attr, NULL);
+               inode_unlock(work->d_inode);
+               if (err)
+                       goto out_dput;
        }
 out_unlock:
        inode_unlock(dir);
@@ -967,10 +986,19 @@ static unsigned int ovl_split_lowerdirs(char *str)
        return ctr;
 }
 
-static int ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
-                                  struct dentry *dentry, struct inode *inode,
-                                  const char *name, const void *value,
-                                  size_t size, int flags)
+static int __maybe_unused
+ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
+                       struct dentry *dentry, struct inode *inode,
+                       const char *name, void *buffer, size_t size)
+{
+       return ovl_xattr_get(dentry, handler->name, buffer, size);
+}
+
+static int __maybe_unused
+ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
+                       struct dentry *dentry, struct inode *inode,
+                       const char *name, const void *value,
+                       size_t size, int flags)
 {
        struct dentry *workdir = ovl_workdir(dentry);
        struct inode *realinode = ovl_inode_real(inode, NULL);
@@ -998,19 +1026,22 @@ static int ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
 
        posix_acl_release(acl);
 
-       return ovl_setxattr(dentry, inode, handler->name, value, size, flags);
+       err = ovl_xattr_set(dentry, handler->name, value, size, flags);
+       if (!err)
+               ovl_copyattr(ovl_inode_real(inode, NULL), inode);
+
+       return err;
 
 out_acl_release:
        posix_acl_release(acl);
        return err;
 }
 
-static int ovl_other_xattr_set(const struct xattr_handler *handler,
-                              struct dentry *dentry, struct inode *inode,
-                              const char *name, const void *value,
-                              size_t size, int flags)
+static int ovl_own_xattr_get(const struct xattr_handler *handler,
+                            struct dentry *dentry, struct inode *inode,
+                            const char *name, void *buffer, size_t size)
 {
-       return ovl_setxattr(dentry, inode, name, value, size, flags);
+       return -EPERM;
 }
 
 static int ovl_own_xattr_set(const struct xattr_handler *handler,
@@ -1021,42 +1052,59 @@ static int ovl_own_xattr_set(const struct xattr_handler *handler,
        return -EPERM;
 }
 
-static const struct xattr_handler ovl_posix_acl_access_xattr_handler = {
+static int ovl_other_xattr_get(const struct xattr_handler *handler,
+                              struct dentry *dentry, struct inode *inode,
+                              const char *name, void *buffer, size_t size)
+{
+       return ovl_xattr_get(dentry, name, buffer, size);
+}
+
+static int ovl_other_xattr_set(const struct xattr_handler *handler,
+                              struct dentry *dentry, struct inode *inode,
+                              const char *name, const void *value,
+                              size_t size, int flags)
+{
+       return ovl_xattr_set(dentry, name, value, size, flags);
+}
+
+static const struct xattr_handler __maybe_unused
+ovl_posix_acl_access_xattr_handler = {
        .name = XATTR_NAME_POSIX_ACL_ACCESS,
        .flags = ACL_TYPE_ACCESS,
+       .get = ovl_posix_acl_xattr_get,
        .set = ovl_posix_acl_xattr_set,
 };
 
-static const struct xattr_handler ovl_posix_acl_default_xattr_handler = {
+static const struct xattr_handler __maybe_unused
+ovl_posix_acl_default_xattr_handler = {
        .name = XATTR_NAME_POSIX_ACL_DEFAULT,
        .flags = ACL_TYPE_DEFAULT,
+       .get = ovl_posix_acl_xattr_get,
        .set = ovl_posix_acl_xattr_set,
 };
 
 static const struct xattr_handler ovl_own_xattr_handler = {
        .prefix = OVL_XATTR_PREFIX,
+       .get = ovl_own_xattr_get,
        .set = ovl_own_xattr_set,
 };
 
 static const struct xattr_handler ovl_other_xattr_handler = {
        .prefix = "", /* catch all */
+       .get = ovl_other_xattr_get,
        .set = ovl_other_xattr_set,
 };
 
 static const struct xattr_handler *ovl_xattr_handlers[] = {
+#ifdef CONFIG_FS_POSIX_ACL
        &ovl_posix_acl_access_xattr_handler,
        &ovl_posix_acl_default_xattr_handler,
+#endif
        &ovl_own_xattr_handler,
        &ovl_other_xattr_handler,
        NULL
 };
 
-static const struct xattr_handler *ovl_xattr_noacl_handlers[] = {
-       &ovl_own_xattr_handler,
-       &ovl_other_xattr_handler,
-       NULL,
-};
-
 static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 {
        struct path upperpath = { NULL, NULL };
@@ -1132,7 +1180,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
        err = -EINVAL;
        stacklen = ovl_split_lowerdirs(lowertmp);
        if (stacklen > OVL_MAX_STACK) {
-               pr_err("overlayfs: too many lower directries, limit is %d\n",
+               pr_err("overlayfs: too many lower directories, limit is %d\n",
                       OVL_MAX_STACK);
                goto out_free_lowertmp;
        } else if (!ufs->config.upperdir && stacklen == 1) {
@@ -1269,10 +1317,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
        sb->s_magic = OVERLAYFS_SUPER_MAGIC;
        sb->s_op = &ovl_super_operations;
-       if (IS_ENABLED(CONFIG_FS_POSIX_ACL))
-               sb->s_xattr = ovl_xattr_handlers;
-       else
-               sb->s_xattr = ovl_xattr_noacl_handlers;
+       sb->s_xattr = ovl_xattr_handlers;
        sb->s_root = root_dentry;
        sb->s_fs_info = ufs;
        sb->s_flags |= MS_POSIXACL;
index 54e2702..ac0df4d 100644 (file)
@@ -1556,18 +1556,13 @@ static const struct file_operations proc_pid_set_comm_operations = {
 static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
 {
        struct task_struct *task;
-       struct mm_struct *mm;
        struct file *exe_file;
 
        task = get_proc_task(d_inode(dentry));
        if (!task)
                return -ENOENT;
-       mm = get_task_mm(task);
+       exe_file = get_task_exe_file(task);
        put_task_struct(task);
-       if (!mm)
-               return -ENOENT;
-       exe_file = get_mm_exe_file(mm);
-       mmput(mm);
        if (exe_file) {
                *exe_path = exe_file->f_path;
                path_get(&exe_file->f_path);
index 187d84e..f6fa99e 100644 (file)
@@ -581,6 +581,8 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
                mss->anonymous_thp += HPAGE_PMD_SIZE;
        else if (PageSwapBacked(page))
                mss->shmem_thp += HPAGE_PMD_SIZE;
+       else if (is_zone_device_page(page))
+               /* pass */;
        else
                VM_BUG_ON_PAGE(1, page);
        smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd));
index 19f532e..6dc4296 100644 (file)
@@ -223,8 +223,10 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
                size -= n;
                buf += n;
                copied += n;
-               if (!m->count)
+               if (!m->count) {
+                       m->from = 0;
                        m->index++;
+               }
                if (!size)
                        goto Done;
        }
index f35523d..b803213 100644 (file)
@@ -114,9 +114,15 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf,
         * If buf != of->prealloc_buf, we don't know how
         * large it is, so cannot safely pass it to ->show
         */
-       if (pos || WARN_ON_ONCE(buf != of->prealloc_buf))
+       if (WARN_ON_ONCE(buf != of->prealloc_buf))
                return 0;
        len = ops->show(kobj, of->kn->priv, buf);
+       if (pos) {
+               if (len <= pos)
+                       return 0;
+               len -= pos;
+               memmove(buf, buf + pos, len);
+       }
        return min(count, len);
 }
 
index b45345d..51157da 100644 (file)
@@ -370,7 +370,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
 
        p = c->gap_lebs;
        do {
-               ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs);
+               ubifs_assert(p < c->gap_lebs + c->lst.idx_lebs);
                written = layout_leb_in_gaps(c, p);
                if (written < 0) {
                        err = written;
index e237811..11a0041 100644 (file)
@@ -575,7 +575,8 @@ static int ubifs_xattr_get(const struct xattr_handler *handler,
        dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name,
                inode->i_ino, dentry, size);
 
-       return  __ubifs_getxattr(inode, name, buffer, size);
+       name = xattr_full_name(handler, name);
+       return __ubifs_getxattr(inode, name, buffer, size);
 }
 
 static int ubifs_xattr_set(const struct xattr_handler *handler,
@@ -586,6 +587,8 @@ static int ubifs_xattr_set(const struct xattr_handler *handler,
        dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd",
                name, inode->i_ino, dentry, size);
 
+       name = xattr_full_name(handler, name);
+
        if (value)
                return __ubifs_setxattr(inode, name, value, size, flags);
        else
index 776ae2f..05b5243 100644 (file)
@@ -1582,6 +1582,7 @@ xfs_alloc_ag_vextent_small(
        xfs_extlen_t    *flenp, /* result length */
        int             *stat)  /* status: 0-freelist, 1-normal/none */
 {
+       struct xfs_owner_info   oinfo;
        int             error;
        xfs_agblock_t   fbno;
        xfs_extlen_t    flen;
@@ -1624,6 +1625,18 @@ xfs_alloc_ag_vextent_small(
                                error0);
                        args->wasfromfl = 1;
                        trace_xfs_alloc_small_freelist(args);
+
+                       /*
+                        * If we're feeding an AGFL block to something that
+                        * doesn't live in the free space, we need to clear
+                        * out the OWN_AG rmap.
+                        */
+                       xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
+                       error = xfs_rmap_free(args->tp, args->agbp, args->agno,
+                                       fbno, 1, &oinfo);
+                       if (error)
+                               goto error0;
+
                        *stat = 0;
                        return 0;
                }
@@ -2264,6 +2277,9 @@ xfs_alloc_log_agf(
                offsetof(xfs_agf_t, agf_longest),
                offsetof(xfs_agf_t, agf_btreeblks),
                offsetof(xfs_agf_t, agf_uuid),
+               offsetof(xfs_agf_t, agf_rmap_blocks),
+               /* needed so that we don't log the whole rest of the structure: */
+               offsetof(xfs_agf_t, agf_spare64),
                sizeof(xfs_agf_t)
        };
 
index b5c213a..0856979 100644 (file)
@@ -1814,6 +1814,10 @@ xfs_btree_lookup(
 
        XFS_BTREE_STATS_INC(cur, lookup);
 
+       /* No such thing as a zero-level tree. */
+       if (cur->bc_nlevels == 0)
+               return -EFSCORRUPTED;
+
        block = NULL;
        keyno = 0;
 
@@ -4554,15 +4558,22 @@ xfs_btree_simple_query_range(
        if (error)
                goto out;
 
+       /* Nothing?  See if there's anything to the right. */
+       if (!stat) {
+               error = xfs_btree_increment(cur, 0, &stat);
+               if (error)
+                       goto out;
+       }
+
        while (stat) {
                /* Find the record. */
                error = xfs_btree_get_rec(cur, &recp, &stat);
                if (error || !stat)
                        break;
-               cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
 
                /* Skip if high_key(rec) < low_key. */
                if (firstrec) {
+                       cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
                        firstrec = false;
                        diff = cur->bc_ops->diff_two_keys(cur, low_key,
                                        &rec_key);
@@ -4571,6 +4582,7 @@ xfs_btree_simple_query_range(
                }
 
                /* Stop if high_key < low_key(rec). */
+               cur->bc_ops->init_key_from_rec(&rec_key, recp);
                diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key);
                if (diff > 0)
                        break;
index 054a203..c221d0e 100644 (file)
@@ -194,7 +194,7 @@ xfs_defer_trans_abort(
        /* Abort intent items. */
        list_for_each_entry(dfp, &dop->dop_pending, dfp_list) {
                trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
-               if (dfp->dfp_committed)
+               if (!dfp->dfp_done)
                        dfp->dfp_type->abort_intent(dfp->dfp_intent);
        }
 
@@ -290,7 +290,6 @@ xfs_defer_finish(
        struct xfs_defer_pending        *dfp;
        struct list_head                *li;
        struct list_head                *n;
-       void                            *done_item = NULL;
        void                            *state;
        int                             error = 0;
        void                            (*cleanup_fn)(struct xfs_trans *, void *, int);
@@ -309,19 +308,11 @@ xfs_defer_finish(
                if (error)
                        goto out;
 
-               /* Mark all pending intents as committed. */
-               list_for_each_entry_reverse(dfp, &dop->dop_pending, dfp_list) {
-                       if (dfp->dfp_committed)
-                               break;
-                       trace_xfs_defer_pending_commit((*tp)->t_mountp, dfp);
-                       dfp->dfp_committed = true;
-               }
-
                /* Log an intent-done item for the first pending item. */
                dfp = list_first_entry(&dop->dop_pending,
                                struct xfs_defer_pending, dfp_list);
                trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp);
-               done_item = dfp->dfp_type->create_done(*tp, dfp->dfp_intent,
+               dfp->dfp_done = dfp->dfp_type->create_done(*tp, dfp->dfp_intent,
                                dfp->dfp_count);
                cleanup_fn = dfp->dfp_type->finish_cleanup;
 
@@ -331,7 +322,7 @@ xfs_defer_finish(
                        list_del(li);
                        dfp->dfp_count--;
                        error = dfp->dfp_type->finish_item(*tp, dop, li,
-                                       done_item, &state);
+                                       dfp->dfp_done, &state);
                        if (error) {
                                /*
                                 * Clean up after ourselves and jump out.
@@ -428,8 +419,8 @@ xfs_defer_add(
                dfp = kmem_alloc(sizeof(struct xfs_defer_pending),
                                KM_SLEEP | KM_NOFS);
                dfp->dfp_type = defer_op_types[type];
-               dfp->dfp_committed = false;
                dfp->dfp_intent = NULL;
+               dfp->dfp_done = NULL;
                dfp->dfp_count = 0;
                INIT_LIST_HEAD(&dfp->dfp_work);
                list_add_tail(&dfp->dfp_list, &dop->dop_intake);
index cc3981c..e96533d 100644 (file)
@@ -30,8 +30,8 @@ struct xfs_defer_op_type;
 struct xfs_defer_pending {
        const struct xfs_defer_op_type  *dfp_type;      /* function pointers */
        struct list_head                dfp_list;       /* pending items */
-       bool                            dfp_committed;  /* committed trans? */
        void                            *dfp_intent;    /* log intent item */
+       void                            *dfp_done;      /* log done item */
        struct list_head                dfp_work;       /* work items */
        unsigned int                    dfp_count;      /* # extent items */
 };
index f814d42..270fb5c 100644 (file)
@@ -640,12 +640,15 @@ typedef struct xfs_agf {
        __be32          agf_btreeblks;  /* # of blocks held in AGF btrees */
        uuid_t          agf_uuid;       /* uuid of filesystem */
 
+       __be32          agf_rmap_blocks;        /* rmapbt blocks used */
+       __be32          agf_padding;            /* padding */
+
        /*
         * reserve some contiguous space for future logged fields before we add
         * the unlogged fields. This makes the range logging via flags and
         * structure offsets much simpler.
         */
-       __be64          agf_spare64[16];
+       __be64          agf_spare64[15];
 
        /* unlogged fields, written during buffer writeback. */
        __be64          agf_lsn;        /* last write sequence */
@@ -670,7 +673,9 @@ typedef struct xfs_agf {
 #define        XFS_AGF_LONGEST         0x00000400
 #define        XFS_AGF_BTREEBLKS       0x00000800
 #define        XFS_AGF_UUID            0x00001000
-#define        XFS_AGF_NUM_BITS        13
+#define        XFS_AGF_RMAP_BLOCKS     0x00002000
+#define        XFS_AGF_SPARE64         0x00004000
+#define        XFS_AGF_NUM_BITS        15
 #define        XFS_AGF_ALL_BITS        ((1 << XFS_AGF_NUM_BITS) - 1)
 
 #define XFS_AGF_FLAGS \
@@ -686,7 +691,9 @@ typedef struct xfs_agf {
        { XFS_AGF_FREEBLKS,     "FREEBLKS" }, \
        { XFS_AGF_LONGEST,      "LONGEST" }, \
        { XFS_AGF_BTREEBLKS,    "BTREEBLKS" }, \
-       { XFS_AGF_UUID,         "UUID" }
+       { XFS_AGF_UUID,         "UUID" }, \
+       { XFS_AGF_RMAP_BLOCKS,  "RMAP_BLOCKS" }, \
+       { XFS_AGF_SPARE64,      "SPARE64" }
 
 /* disk block (xfs_daddr_t) in the AG */
 #define XFS_AGF_DADDR(mp)      ((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
index bc1faeb..17b8eeb 100644 (file)
@@ -98,6 +98,8 @@ xfs_rmapbt_alloc_block(
        union xfs_btree_ptr     *new,
        int                     *stat)
 {
+       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
        int                     error;
        xfs_agblock_t           bno;
 
@@ -124,6 +126,8 @@ xfs_rmapbt_alloc_block(
 
        xfs_trans_agbtree_delta(cur->bc_tp, 1);
        new->s = cpu_to_be32(bno);
+       be32_add_cpu(&agf->agf_rmap_blocks, 1);
+       xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
 
        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
        *stat = 1;
@@ -143,6 +147,8 @@ xfs_rmapbt_free_block(
        bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
        trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno,
                        bno, 1);
+       be32_add_cpu(&agf->agf_rmap_blocks, -1);
+       xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
        error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
        if (error)
                return error;
index 0e3d4f5..4aecc5f 100644 (file)
@@ -583,7 +583,8 @@ xfs_sb_verify(
         * Only check the in progress field for the primary superblock as
         * mkfs.xfs doesn't clear it from secondary superblocks.
         */
-       return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR,
+       return xfs_mount_validate_sb(mp, &sb,
+                                    bp->b_maps[0].bm_bn == XFS_SB_DADDR,
                                     check_version);
 }
 
index 47a318c..b5b9bff 100644 (file)
@@ -115,7 +115,6 @@ xfs_buf_ioacct_dec(
        if (!(bp->b_flags & _XBF_IN_FLIGHT))
                return;
 
-       ASSERT(bp->b_flags & XBF_ASYNC);
        bp->b_flags &= ~_XBF_IN_FLIGHT;
        percpu_counter_dec(&bp->b_target->bt_io_count);
 }
@@ -1612,7 +1611,7 @@ xfs_wait_buftarg(
         */
        while (percpu_counter_sum(&btp->bt_io_count))
                delay(100);
-       drain_workqueue(btp->bt_mount->m_buf_workqueue);
+       flush_workqueue(btp->bt_mount->m_buf_workqueue);
 
        /* loop until there is nothing left on the lru list. */
        while (list_lru_count(&btp->bt_lru)) {
index ed95e5b..e612a02 100644 (file)
@@ -741,9 +741,20 @@ xfs_file_dax_write(
         * page is inserted into the pagecache when we have to serve a write
         * fault on a hole.  It should never be dirtied and can simply be
         * dropped from the pagecache once we get real data for the page.
+        *
+        * XXX: This is racy against mmap, and there's nothing we can do about
+        * it. dax_do_io() should really do this invalidation internally as
+        * it will know if we've allocated over a holei for this specific IO and
+        * if so it needs to update the mapping tree and invalidate existing
+        * PTEs over the newly allocated range. Remove this invalidation when
+        * dax_do_io() is fixed up.
         */
        if (mapping->nrpages) {
-               ret = invalidate_inode_pages2(mapping);
+               loff_t end = iocb->ki_pos + iov_iter_count(from) - 1;
+
+               ret = invalidate_inode_pages2_range(mapping,
+                                                   iocb->ki_pos >> PAGE_SHIFT,
+                                                   end >> PAGE_SHIFT);
                WARN_ON_ONCE(ret);
        }
 
index 0f96847..0b7f986 100644 (file)
@@ -248,6 +248,7 @@ xfs_growfs_data_private(
                        agf->agf_roots[XFS_BTNUM_RMAPi] =
                                                cpu_to_be32(XFS_RMAP_BLOCK(mp));
                        agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
+                       agf->agf_rmap_blocks = cpu_to_be32(1);
                }
 
                agf->agf_flfirst = cpu_to_be32(1);
index 2114d53..2af0dda 100644 (file)
@@ -715,12 +715,16 @@ xfs_iomap_write_allocate(
                 * is in the delayed allocation extent on which we sit
                 * but before our buffer starts.
                 */
-
                nimaps = 0;
                while (nimaps == 0) {
                        nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
-
-                       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, nres,
+                       /*
+                        * We have already reserved space for the extent and any
+                        * indirect blocks when creating the delalloc extent,
+                        * there is no need to reserve space in this transaction
+                        * again.
+                        */
+                       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0,
                                        0, XFS_TRANS_RESERVE, &tp);
                        if (error)
                                return error;
@@ -1037,20 +1041,14 @@ xfs_file_iomap_begin(
                        return error;
 
                trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
-               xfs_bmbt_to_iomap(ip, iomap, &imap);
-       } else if (nimaps) {
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-               trace_xfs_iomap_found(ip, offset, length, 0, &imap);
-               xfs_bmbt_to_iomap(ip, iomap, &imap);
        } else {
+               ASSERT(nimaps);
+
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
-               trace_xfs_iomap_not_found(ip, offset, length, 0, &imap);
-               iomap->blkno = IOMAP_NULL_BLOCK;
-               iomap->type = IOMAP_HOLE;
-               iomap->offset = offset;
-               iomap->length = length;
+               trace_xfs_iomap_found(ip, offset, length, 0, &imap);
        }
 
+       xfs_bmbt_to_iomap(ip, iomap, &imap);
        return 0;
 }
 
@@ -1112,3 +1110,48 @@ struct iomap_ops xfs_iomap_ops = {
        .iomap_begin            = xfs_file_iomap_begin,
        .iomap_end              = xfs_file_iomap_end,
 };
+
+static int
+xfs_xattr_iomap_begin(
+       struct inode            *inode,
+       loff_t                  offset,
+       loff_t                  length,
+       unsigned                flags,
+       struct iomap            *iomap)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
+       xfs_fileoff_t           end_fsb = XFS_B_TO_FSB(mp, offset + length);
+       struct xfs_bmbt_irec    imap;
+       int                     nimaps = 1, error = 0;
+       unsigned                lockmode;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       lockmode = xfs_ilock_data_map_shared(ip);
+
+       /* if there are no attribute fork or extents, return ENOENT */
+       if (XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) {
+               error = -ENOENT;
+               goto out_unlock;
+       }
+
+       ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL);
+       error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
+                              &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK);
+out_unlock:
+       xfs_iunlock(ip, lockmode);
+
+       if (!error) {
+               ASSERT(nimaps);
+               xfs_bmbt_to_iomap(ip, iomap, &imap);
+       }
+
+       return error;
+}
+
+struct iomap_ops xfs_xattr_iomap_ops = {
+       .iomap_begin            = xfs_xattr_iomap_begin,
+};
index e066d04..fb8aca3 100644 (file)
@@ -35,5 +35,6 @@ void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
                struct xfs_bmbt_irec *);
 
 extern struct iomap_ops xfs_iomap_ops;
+extern struct iomap_ops xfs_xattr_iomap_ops;
 
 #endif /* __XFS_IOMAP_H__*/
index ab820f8..b24c310 100644 (file)
@@ -1009,7 +1009,14 @@ xfs_vn_fiemap(
        int                     error;
 
        xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
-       error = iomap_fiemap(inode, fieinfo, start, length, &xfs_iomap_ops);
+       if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
+               fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
+               error = iomap_fiemap(inode, fieinfo, start, length,
+                               &xfs_xattr_iomap_ops);
+       } else {
+               error = iomap_fiemap(inode, fieinfo, start, length,
+                               &xfs_iomap_ops);
+       }
        xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED);
 
        return error;
index 24ef83e..fd6be45 100644 (file)
@@ -1574,9 +1574,16 @@ xfs_fs_fill_super(
                }
        }
 
-       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+               if (mp->m_sb.sb_rblocks) {
+                       xfs_alert(mp,
+       "EXPERIMENTAL reverse mapping btree not compatible with realtime device!");
+                       error = -EINVAL;
+                       goto out_filestream_unmount;
+               }
                xfs_alert(mp,
        "EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!");
+       }
 
        error = xfs_mountfs(mp);
        if (error)
index 551b7e2..d303a66 100644 (file)
@@ -1298,7 +1298,6 @@ DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
 DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
 DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
 DEFINE_IOMAP_EVENT(xfs_iomap_found);
-DEFINE_IOMAP_EVENT(xfs_iomap_not_found);
 
 DECLARE_EVENT_CLASS(xfs_simple_io_class,
        TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -2296,7 +2295,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class,
                __entry->dev = mp ? mp->m_super->s_dev : 0;
                __entry->type = dfp->dfp_type->type;
                __entry->intent = dfp->dfp_intent;
-               __entry->committed = dfp->dfp_committed;
+               __entry->committed = dfp->dfp_done != NULL;
                __entry->nr = dfp->dfp_count;
        ),
        TP_printk("dev %d:%d optype %d intent %p committed %d nr %d\n",
index 1bfa602..5dea1fb 100644 (file)
@@ -72,6 +72,7 @@ struct exception_table_entry
 /* Returns 0 if exception not found and fixup otherwise.  */
 extern unsigned long search_exception_table(unsigned long);
 
+
 /*
  * architectures with an MMU should override these two
  */
index 4d8452c..c5eaf2f 100644 (file)
@@ -1056,7 +1056,7 @@ static inline struct fwnode_handle *acpi_get_next_subnode(struct device *dev,
        return NULL;
 }
 
-#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, validate, data, fn) \
+#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, valid, data, fn) \
        static const void * __acpi_table_##name[]                       \
                __attribute__((unused))                                 \
                 = { (void *) table_id,                                 \
index ebd5c1f..c607fce 100644 (file)
@@ -23,6 +23,7 @@
 #define  BCMA_CLKCTLST_4328A0_HAVEALP  0x00020000 /* 4328a0 has reversed bits */
 
 /* Agent registers (common for every core) */
+#define BCMA_OOB_SEL_OUT_A30           0x0100
 #define BCMA_IOCTL                     0x0408 /* IO control */
 #define  BCMA_IOCTL_CLK                        0x0001
 #define  BCMA_IOCTL_FGC                        0x0002
index 59ffaa6..23ddf4b 100644 (file)
@@ -71,7 +71,8 @@ static inline bool bio_has_data(struct bio *bio)
 {
        if (bio &&
            bio->bi_iter.bi_size &&
-           bio_op(bio) != REQ_OP_DISCARD)
+           bio_op(bio) != REQ_OP_DISCARD &&
+           bio_op(bio) != REQ_OP_SECURE_ERASE)
                return true;
 
        return false;
@@ -79,7 +80,9 @@ static inline bool bio_has_data(struct bio *bio)
 
 static inline bool bio_no_advance_iter(struct bio *bio)
 {
-       return bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_WRITE_SAME;
+       return bio_op(bio) == REQ_OP_DISCARD ||
+              bio_op(bio) == REQ_OP_SECURE_ERASE ||
+              bio_op(bio) == REQ_OP_WRITE_SAME;
 }
 
 static inline bool bio_is_rw(struct bio *bio)
@@ -199,6 +202,9 @@ static inline unsigned bio_segments(struct bio *bio)
        if (bio_op(bio) == REQ_OP_DISCARD)
                return 1;
 
+       if (bio_op(bio) == REQ_OP_SECURE_ERASE)
+               return 1;
+
        if (bio_op(bio) == REQ_OP_WRITE_SAME)
                return 1;
 
index 2c210b6..e79055c 100644 (file)
@@ -882,7 +882,7 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
                                                     int op)
 {
-       if (unlikely(op == REQ_OP_DISCARD))
+       if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE))
                return min(q->limits.max_discard_sectors, UINT_MAX >> 9);
 
        if (unlikely(op == REQ_OP_WRITE_SAME))
@@ -913,7 +913,9 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
        if (unlikely(rq->cmd_type != REQ_TYPE_FS))
                return q->limits.max_hw_sectors;
 
-       if (!q->limits.chunk_sectors || (req_op(rq) == REQ_OP_DISCARD))
+       if (!q->limits.chunk_sectors ||
+           req_op(rq) == REQ_OP_DISCARD ||
+           req_op(rq) == REQ_OP_SECURE_ERASE)
                return blk_queue_get_max_sectors(q, req_op(rq));
 
        return min(blk_max_size_offset(q, offset),
index 1113423..9a904f6 100644 (file)
@@ -297,6 +297,10 @@ static inline struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
 static inline void bpf_prog_put(struct bpf_prog *prog)
 {
 }
+static inline struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
+{
+       return ERR_PTR(-EOPNOTSUPP);
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 /* verifier prototypes for helper functions called from eBPF programs */
index e294939..573c5a1 100644 (file)
 #define __compiler_offsetof(a, b)                                      \
        __builtin_offsetof(a, b)
 
-#if GCC_VERSION >= 40100 && GCC_VERSION < 40600
+#if GCC_VERSION >= 40100
 # define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 #endif
 
  */
 #define asm_volatile_goto(x...)        do { asm goto(x); asm (""); } while (0)
 
-#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
+/*
+ * sparse (__CHECKER__) pretends to be gcc, but can't do constant
+ * folding in __builtin_bswap*() (yet), so don't set these for it.
+ */
+#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) && !defined(__CHECKER__)
 #if GCC_VERSION >= 40400
 #define __HAVE_BUILTIN_BSWAP32__
 #define __HAVE_BUILTIN_BSWAP64__
 #if GCC_VERSION >= 40800
 #define __HAVE_BUILTIN_BSWAP16__
 #endif
-#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */
+#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP && !__CHECKER__ */
 
 #if GCC_VERSION >= 50000
 #define KASAN_ABI_VERSION 4
index 1bb9548..436aa4e 100644 (file)
@@ -527,13 +527,13 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
  * object's lifetime is managed by something other than RCU.  That
  * "something other" might be reference counting or simple immortality.
  *
- * The seemingly unused void * variable is to validate @p is indeed a pointer
- * type. All pointer types silently cast to void *.
+ * The seemingly unused size_t variable is to validate @p is indeed a pointer
+ * type by making sure it can be dereferenced.
  */
 #define lockless_dereference(p) \
 ({ \
        typeof(p) _________p1 = READ_ONCE(p); \
-       __maybe_unused const void * const _________p2 = _________p1; \
+       size_t __maybe_unused __size_of_ptr = sizeof(*(p)); \
        smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
        (_________p1); \
 })
index 8cc719a..2ac6fa5 100644 (file)
@@ -49,8 +49,6 @@ struct fence_cb;
  * @timestamp: Timestamp when the fence was signaled.
  * @status: Optional, only valid if < 0, must be set before calling
  * fence_signal, indicates that the fence has completed with an error.
- * @child_list: list of children fences
- * @active_list: list of active fences
  *
  * the flags member must be manipulated and read using the appropriate
  * atomic ops (bit_*), so taking the spinlock will not be needed most
index a16439b..1f09c52 100644 (file)
@@ -314,6 +314,70 @@ struct bpf_prog_aux;
        bpf_size;                                               \
 })
 
+#define BPF_SIZEOF(type)                                       \
+       ({                                                      \
+               const int __size = bytes_to_bpf_size(sizeof(type)); \
+               BUILD_BUG_ON(__size < 0);                       \
+               __size;                                         \
+       })
+
+#define BPF_FIELD_SIZEOF(type, field)                          \
+       ({                                                      \
+               const int __size = bytes_to_bpf_size(FIELD_SIZEOF(type, field)); \
+               BUILD_BUG_ON(__size < 0);                       \
+               __size;                                         \
+       })
+
+#define __BPF_MAP_0(m, v, ...) v
+#define __BPF_MAP_1(m, v, t, a, ...) m(t, a)
+#define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__)
+#define __BPF_MAP_3(m, v, t, a, ...) m(t, a), __BPF_MAP_2(m, v, __VA_ARGS__)
+#define __BPF_MAP_4(m, v, t, a, ...) m(t, a), __BPF_MAP_3(m, v, __VA_ARGS__)
+#define __BPF_MAP_5(m, v, t, a, ...) m(t, a), __BPF_MAP_4(m, v, __VA_ARGS__)
+
+#define __BPF_REG_0(...) __BPF_PAD(5)
+#define __BPF_REG_1(...) __BPF_MAP(1, __VA_ARGS__), __BPF_PAD(4)
+#define __BPF_REG_2(...) __BPF_MAP(2, __VA_ARGS__), __BPF_PAD(3)
+#define __BPF_REG_3(...) __BPF_MAP(3, __VA_ARGS__), __BPF_PAD(2)
+#define __BPF_REG_4(...) __BPF_MAP(4, __VA_ARGS__), __BPF_PAD(1)
+#define __BPF_REG_5(...) __BPF_MAP(5, __VA_ARGS__)
+
+#define __BPF_MAP(n, ...) __BPF_MAP_##n(__VA_ARGS__)
+#define __BPF_REG(n, ...) __BPF_REG_##n(__VA_ARGS__)
+
+#define __BPF_CAST(t, a)                                                      \
+       (__force t)                                                            \
+       (__force                                                               \
+        typeof(__builtin_choose_expr(sizeof(t) == sizeof(unsigned long),      \
+                                     (unsigned long)0, (t)0))) a
+#define __BPF_V void
+#define __BPF_N
+
+#define __BPF_DECL_ARGS(t, a) t   a
+#define __BPF_DECL_REGS(t, a) u64 a
+
+#define __BPF_PAD(n)                                                          \
+       __BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2,       \
+                 u64, __ur_3, u64, __ur_4, u64, __ur_5)
+
+#define BPF_CALL_x(x, name, ...)                                              \
+       static __always_inline                                                 \
+       u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__));   \
+       u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__));         \
+       u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__))          \
+       {                                                                      \
+               return ____##name(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\
+       }                                                                      \
+       static __always_inline                                                 \
+       u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__))
+
+#define BPF_CALL_0(name, ...)  BPF_CALL_x(0, name, __VA_ARGS__)
+#define BPF_CALL_1(name, ...)  BPF_CALL_x(1, name, __VA_ARGS__)
+#define BPF_CALL_2(name, ...)  BPF_CALL_x(2, name, __VA_ARGS__)
+#define BPF_CALL_3(name, ...)  BPF_CALL_x(3, name, __VA_ARGS__)
+#define BPF_CALL_4(name, ...)  BPF_CALL_x(4, name, __VA_ARGS__)
+#define BPF_CALL_5(name, ...)  BPF_CALL_x(5, name, __VA_ARGS__)
+
 #ifdef CONFIG_COMPAT
 /* A struct sock_filter is architecture independent. */
 struct compat_sock_fprog {
index 3523bf6..901e25d 100644 (file)
@@ -574,6 +574,7 @@ static inline void mapping_allow_writable(struct address_space *mapping)
 
 struct posix_acl;
 #define ACL_NOT_CACHED ((void *)(-1))
+#define ACL_DONT_CACHE ((void *)(-3))
 
 static inline struct posix_acl *
 uncached_acl_sentinel(struct task_struct *task)
index cfa6cde..76cff18 100644 (file)
@@ -274,8 +274,7 @@ extern void fscrypt_restore_control_page(struct page *);
 extern int fscrypt_zeroout_range(struct inode *, pgoff_t, sector_t,
                                                unsigned int);
 /* policy.c */
-extern int fscrypt_process_policy(struct inode *,
-                                       const struct fscrypt_policy *);
+extern int fscrypt_process_policy(struct file *, const struct fscrypt_policy *);
 extern int fscrypt_get_policy(struct inode *, struct fscrypt_policy *);
 extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
 extern int fscrypt_inherit_context(struct inode *, struct inode *,
@@ -345,7 +344,7 @@ static inline int fscrypt_notsupp_zeroout_range(struct inode *i, pgoff_t p,
 }
 
 /* policy.c */
-static inline int fscrypt_notsupp_process_policy(struct inode *i,
+static inline int fscrypt_notsupp_process_policy(struct file *f,
                                const struct fscrypt_policy *p)
 {
        return -EOPNOTSUPP;
index d2ba7d3..1ffbf2a 100644 (file)
@@ -304,6 +304,8 @@ struct tegra_mipi_device;
 
 struct tegra_mipi_device *tegra_mipi_request(struct device *device);
 void tegra_mipi_free(struct tegra_mipi_device *device);
+int tegra_mipi_enable(struct tegra_mipi_device *device);
+int tegra_mipi_disable(struct tegra_mipi_device *device);
 int tegra_mipi_calibrate(struct tegra_mipi_device *device);
 
 #endif
index b10954a..5df444b 100644 (file)
@@ -1114,6 +1114,13 @@ int __must_check __vmbus_driver_register(struct hv_driver *hv_driver,
                                         const char *mod_name);
 void vmbus_driver_unregister(struct hv_driver *hv_driver);
 
+static inline const char *vmbus_dev_name(const struct hv_device *device_obj)
+{
+       const struct kobject *kobj = &device_obj->device.kobj;
+
+       return kobj->name;
+}
+
 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel);
 
 int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
index dcb89e3..c6587c0 100644 (file)
@@ -45,6 +45,7 @@ struct br_ip_list {
 #define BR_PROXYARP            BIT(8)
 #define BR_LEARNING_SYNC       BIT(9)
 #define BR_PROXYARP_WIFI       BIT(10)
+#define BR_MCAST_FLOOD         BIT(11)
 
 #define BR_DEFAULT_AGEING_TIME (300 * HZ)
 
index a5f6ce6..3319d97 100644 (file)
@@ -81,6 +81,7 @@ static inline bool is_vlan_dev(const struct net_device *dev)
 #define skb_vlan_tag_present(__skb)    ((__skb)->vlan_tci & VLAN_TAG_PRESENT)
 #define skb_vlan_tag_get(__skb)                ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
 #define skb_vlan_tag_get_id(__skb)     ((__skb)->vlan_tci & VLAN_VID_MASK)
+#define skb_vlan_tag_get_prio(__skb)   ((__skb)->vlan_tci & VLAN_PRIO_MASK)
 
 /**
  *     struct vlan_pcpu_stats - VLAN percpu rx/tx stats
@@ -271,6 +272,23 @@ static inline int vlan_get_encap_level(struct net_device *dev)
 }
 #endif
 
+/**
+ * eth_type_vlan - check for valid vlan ether type.
+ * @ethertype: ether type to check
+ *
+ * Returns true if the ether type is a vlan ether type.
+ */
+static inline bool eth_type_vlan(__be16 ethertype)
+{
+       switch (ethertype) {
+       case htons(ETH_P_8021Q):
+       case htons(ETH_P_8021AD):
+               return true;
+       default:
+               return false;
+       }
+}
+
 static inline bool vlan_hw_offload_capable(netdev_features_t features,
                                           __be16 proto)
 {
@@ -424,8 +442,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
 {
        struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb->data;
 
-       if (veth->h_vlan_proto != htons(ETH_P_8021Q) &&
-           veth->h_vlan_proto != htons(ETH_P_8021AD))
+       if (!eth_type_vlan(veth->h_vlan_proto))
                return -EINVAL;
 
        *vlan_tci = ntohs(veth->h_vlan_TCI);
@@ -487,7 +504,7 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type,
         * present at mac_len - VLAN_HLEN (if mac_len > 0), or at
         * ETH_HLEN otherwise
         */
-       if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
+       if (eth_type_vlan(type)) {
                if (vlan_depth) {
                        if (WARN_ON(vlan_depth < VLAN_HLEN))
                                return 0;
@@ -505,8 +522,7 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type,
                        vh = (struct vlan_hdr *)(skb->data + vlan_depth);
                        type = vh->h_vlan_encapsulated_proto;
                        vlan_depth += VLAN_HLEN;
-               } while (type == htons(ETH_P_8021Q) ||
-                        type == htons(ETH_P_8021AD));
+               } while (eth_type_vlan(type));
        }
 
        if (depth)
@@ -571,8 +587,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb,
 static inline bool skb_vlan_tagged(const struct sk_buff *skb)
 {
        if (!skb_vlan_tag_present(skb) &&
-           likely(skb->protocol != htons(ETH_P_8021Q) &&
-                  skb->protocol != htons(ETH_P_8021AD)))
+           likely(!eth_type_vlan(skb->protocol)))
                return false;
 
        return true;
@@ -592,15 +607,14 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb)
        if (!skb_vlan_tag_present(skb)) {
                struct vlan_ethhdr *veh;
 
-               if (likely(protocol != htons(ETH_P_8021Q) &&
-                          protocol != htons(ETH_P_8021AD)))
+               if (likely(!eth_type_vlan(protocol)))
                        return false;
 
                veh = (struct vlan_ethhdr *)skb->data;
                protocol = veh->h_vlan_encapsulated_proto;
        }
 
-       if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD))
+       if (!eth_type_vlan(protocol))
                return false;
 
        return true;
index 5198f8e..c97eab6 100644 (file)
@@ -62,7 +62,7 @@ void iio_swt_group_init_type_name(struct iio_sw_trigger *t,
                                  const char *name,
                                  struct config_item_type *type)
 {
-#ifdef CONFIG_CONFIGFS_FS
+#if IS_ENABLED(CONFIG_CONFIGFS_FS)
        config_group_init_type_name(&t->group, name, type);
 #endif
 }
index feb04ea..65da430 100644 (file)
@@ -37,7 +37,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
                      struct sk_buff *skb, const struct inet_diag_req_v2 *req,
                      struct user_namespace *user_ns,
                      u32 pid, u32 seq, u16 nlmsg_flags,
-                     const struct nlmsghdr *unlh);
+                     const struct nlmsghdr *unlh, bool net_admin);
 void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb,
                         struct netlink_callback *cb,
                         const struct inet_diag_req_v2 *r,
@@ -56,7 +56,7 @@ void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk);
 
 int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
                             struct inet_diag_msg *r, int ext,
-                            struct user_namespace *user_ns);
+                            struct user_namespace *user_ns, bool net_admin);
 
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
 extern void inet_diag_unregister(const struct inet_diag_handler *handler);
index 3267df4..3d70ece 100644 (file)
@@ -18,6 +18,11 @@ struct vm_fault;
 #define IOMAP_MAPPED   0x03    /* blocks allocated @blkno */
 #define IOMAP_UNWRITTEN        0x04    /* blocks allocated @blkno in unwritten state */
 
+/*
+ * Flags for iomap mappings:
+ */
+#define IOMAP_F_MERGED 0x01    /* contains multiple blocks/extents */
+
 /*
  * Magic value for blkno:
  */
@@ -27,7 +32,8 @@ struct iomap {
        sector_t                blkno;  /* 1st sector of mapping, 512b units */
        loff_t                  offset; /* file offset of mapping, bytes */
        u64                     length; /* length of mapping, bytes */
-       int                     type;   /* type of mapping */
+       u16                     type;   /* type of mapping */
+       u16                     flags;  /* flags for mapping */
        struct block_device     *bdev;  /* block device for I/O */
 };
 
index 56b0b7e..99ac022 100644 (file)
  */
 #define E_ITS_MOVI_UNMAPPED_INTERRUPT          0x010107
 #define E_ITS_MOVI_UNMAPPED_COLLECTION         0x010109
+#define E_ITS_INT_UNMAPPED_INTERRUPT           0x010307
 #define E_ITS_CLEAR_UNMAPPED_INTERRUPT         0x010507
 #define E_ITS_MAPD_DEVICE_OOR                  0x010801
 #define E_ITS_MAPC_PROCNUM_OOR                 0x010902
index 4429d25..5e5b296 100644 (file)
@@ -195,6 +195,7 @@ static inline bool vma_migratable(struct vm_area_struct *vma)
 }
 
 extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
+extern void mpol_put_task_policy(struct task_struct *);
 
 #else
 
@@ -297,5 +298,8 @@ static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
        return -1; /* no node preference */
 }
 
+static inline void mpol_put_task_policy(struct task_struct *task)
+{
+}
 #endif /* CONFIG_NUMA */
 #endif
diff --git a/include/linux/mfd/da8xx-cfgchip.h b/include/linux/mfd/da8xx-cfgchip.h
new file mode 100644 (file)
index 0000000..304985e
--- /dev/null
@@ -0,0 +1,153 @@
+/*
+ * TI DaVinci DA8xx CHIPCFGx registers for syscon consumers.
+ *
+ * Copyright (C) 2016 David Lechner <david@lechnology.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __LINUX_MFD_DA8XX_CFGCHIP_H
+#define __LINUX_MFD_DA8XX_CFGCHIP_H
+
+#include <linux/bitops.h>
+
+/* register offset (32-bit registers) */
+#define CFGCHIP(n)                             ((n) * 4)
+
+/* CFGCHIP0 (PLL0/EDMA3_0) register bits */
+#define CFGCHIP0_PLL_MASTER_LOCK               BIT(4)
+#define CFGCHIP0_EDMA30TC1DBS(n)               ((n) << 2)
+#define CFGCHIP0_EDMA30TC1DBS_MASK             CFGCHIP0_EDMA30TC1DBS(0x3)
+#define CFGCHIP0_EDMA30TC1DBS_16               CFGCHIP0_EDMA30TC1DBS(0x0)
+#define CFGCHIP0_EDMA30TC1DBS_32               CFGCHIP0_EDMA30TC1DBS(0x1)
+#define CFGCHIP0_EDMA30TC1DBS_64               CFGCHIP0_EDMA30TC1DBS(0x2)
+#define CFGCHIP0_EDMA30TC0DBS(n)               ((n) << 0)
+#define CFGCHIP0_EDMA30TC0DBS_MASK             CFGCHIP0_EDMA30TC0DBS(0x3)
+#define CFGCHIP0_EDMA30TC0DBS_16               CFGCHIP0_EDMA30TC0DBS(0x0)
+#define CFGCHIP0_EDMA30TC0DBS_32               CFGCHIP0_EDMA30TC0DBS(0x1)
+#define CFGCHIP0_EDMA30TC0DBS_64               CFGCHIP0_EDMA30TC0DBS(0x2)
+
+/* CFGCHIP1 (eCAP/HPI/EDMA3_1/eHRPWM TBCLK/McASP0 AMUTEIN) register bits */
+#define CFGCHIP1_CAP2SRC(n)                    ((n) << 27)
+#define CFGCHIP1_CAP2SRC_MASK                  CFGCHIP1_CAP2SRC(0x1f)
+#define CFGCHIP1_CAP2SRC_ECAP_PIN              CFGCHIP1_CAP2SRC(0x0)
+#define CFGCHIP1_CAP2SRC_MCASP0_TX             CFGCHIP1_CAP2SRC(0x1)
+#define CFGCHIP1_CAP2SRC_MCASP0_RX             CFGCHIP1_CAP2SRC(0x2)
+#define CFGCHIP1_CAP2SRC_EMAC_C0_RX_THRESHOLD  CFGCHIP1_CAP2SRC(0x7)
+#define CFGCHIP1_CAP2SRC_EMAC_C0_RX            CFGCHIP1_CAP2SRC(0x8)
+#define CFGCHIP1_CAP2SRC_EMAC_C0_TX            CFGCHIP1_CAP2SRC(0x9)
+#define CFGCHIP1_CAP2SRC_EMAC_C0_MISC          CFGCHIP1_CAP2SRC(0xa)
+#define CFGCHIP1_CAP2SRC_EMAC_C1_RX_THRESHOLD  CFGCHIP1_CAP2SRC(0xb)
+#define CFGCHIP1_CAP2SRC_EMAC_C1_RX            CFGCHIP1_CAP2SRC(0xc)
+#define CFGCHIP1_CAP2SRC_EMAC_C1_TX            CFGCHIP1_CAP2SRC(0xd)
+#define CFGCHIP1_CAP2SRC_EMAC_C1_MISC          CFGCHIP1_CAP2SRC(0xe)
+#define CFGCHIP1_CAP2SRC_EMAC_C2_RX_THRESHOLD  CFGCHIP1_CAP2SRC(0xf)
+#define CFGCHIP1_CAP2SRC_EMAC_C2_RX            CFGCHIP1_CAP2SRC(0x10)
+#define CFGCHIP1_CAP2SRC_EMAC_C2_TX            CFGCHIP1_CAP2SRC(0x11)
+#define CFGCHIP1_CAP2SRC_EMAC_C2_MISC          CFGCHIP1_CAP2SRC(0x12)
+#define CFGCHIP1_CAP1SRC(n)                    ((n) << 22)
+#define CFGCHIP1_CAP1SRC_MASK                  CFGCHIP1_CAP1SRC(0x1f)
+#define CFGCHIP1_CAP1SRC_ECAP_PIN              CFGCHIP1_CAP1SRC(0x0)
+#define CFGCHIP1_CAP1SRC_MCASP0_TX             CFGCHIP1_CAP1SRC(0x1)
+#define CFGCHIP1_CAP1SRC_MCASP0_RX             CFGCHIP1_CAP1SRC(0x2)
+#define CFGCHIP1_CAP1SRC_EMAC_C0_RX_THRESHOLD  CFGCHIP1_CAP1SRC(0x7)
+#define CFGCHIP1_CAP1SRC_EMAC_C0_RX            CFGCHIP1_CAP1SRC(0x8)
+#define CFGCHIP1_CAP1SRC_EMAC_C0_TX            CFGCHIP1_CAP1SRC(0x9)
+#define CFGCHIP1_CAP1SRC_EMAC_C0_MISC          CFGCHIP1_CAP1SRC(0xa)
+#define CFGCHIP1_CAP1SRC_EMAC_C1_RX_THRESHOLD  CFGCHIP1_CAP1SRC(0xb)
+#define CFGCHIP1_CAP1SRC_EMAC_C1_RX            CFGCHIP1_CAP1SRC(0xc)
+#define CFGCHIP1_CAP1SRC_EMAC_C1_TX            CFGCHIP1_CAP1SRC(0xd)
+#define CFGCHIP1_CAP1SRC_EMAC_C1_MISC          CFGCHIP1_CAP1SRC(0xe)
+#define CFGCHIP1_CAP1SRC_EMAC_C2_RX_THRESHOLD  CFGCHIP1_CAP1SRC(0xf)
+#define CFGCHIP1_CAP1SRC_EMAC_C2_RX            CFGCHIP1_CAP1SRC(0x10)
+#define CFGCHIP1_CAP1SRC_EMAC_C2_TX            CFGCHIP1_CAP1SRC(0x11)
+#define CFGCHIP1_CAP1SRC_EMAC_C2_MISC          CFGCHIP1_CAP1SRC(0x12)
+#define CFGCHIP1_CAP0SRC(n)                    ((n) << 17)
+#define CFGCHIP1_CAP0SRC_MASK                  CFGCHIP1_CAP0SRC(0x1f)
+#define CFGCHIP1_CAP0SRC_ECAP_PIN              CFGCHIP1_CAP0SRC(0x0)
+#define CFGCHIP1_CAP0SRC_MCASP0_TX             CFGCHIP1_CAP0SRC(0x1)
+#define CFGCHIP1_CAP0SRC_MCASP0_RX             CFGCHIP1_CAP0SRC(0x2)
+#define CFGCHIP1_CAP0SRC_EMAC_C0_RX_THRESHOLD  CFGCHIP1_CAP0SRC(0x7)
+#define CFGCHIP1_CAP0SRC_EMAC_C0_RX            CFGCHIP1_CAP0SRC(0x8)
+#define CFGCHIP1_CAP0SRC_EMAC_C0_TX            CFGCHIP1_CAP0SRC(0x9)
+#define CFGCHIP1_CAP0SRC_EMAC_C0_MISC          CFGCHIP1_CAP0SRC(0xa)
+#define CFGCHIP1_CAP0SRC_EMAC_C1_RX_THRESHOLD  CFGCHIP1_CAP0SRC(0xb)
+#define CFGCHIP1_CAP0SRC_EMAC_C1_RX            CFGCHIP1_CAP0SRC(0xc)
+#define CFGCHIP1_CAP0SRC_EMAC_C1_TX            CFGCHIP1_CAP0SRC(0xd)
+#define CFGCHIP1_CAP0SRC_EMAC_C1_MISC          CFGCHIP1_CAP0SRC(0xe)
+#define CFGCHIP1_CAP0SRC_EMAC_C2_RX_THRESHOLD  CFGCHIP1_CAP0SRC(0xf)
+#define CFGCHIP1_CAP0SRC_EMAC_C2_RX            CFGCHIP1_CAP0SRC(0x10)
+#define CFGCHIP1_CAP0SRC_EMAC_C2_TX            CFGCHIP1_CAP0SRC(0x11)
+#define CFGCHIP1_CAP0SRC_EMAC_C2_MISC          CFGCHIP1_CAP0SRC(0x12)
+#define CFGCHIP1_HPIBYTEAD                     BIT(16)
+#define CFGCHIP1_HPIENA                                BIT(15)
+#define CFGCHIP0_EDMA31TC0DBS(n)               ((n) << 13)
+#define CFGCHIP0_EDMA31TC0DBS_MASK             CFGCHIP0_EDMA31TC0DBS(0x3)
+#define CFGCHIP0_EDMA31TC0DBS_16               CFGCHIP0_EDMA31TC0DBS(0x0)
+#define CFGCHIP0_EDMA31TC0DBS_32               CFGCHIP0_EDMA31TC0DBS(0x1)
+#define CFGCHIP0_EDMA31TC0DBS_64               CFGCHIP0_EDMA31TC0DBS(0x2)
+#define CFGCHIP1_TBCLKSYNC                     BIT(12)
+#define CFGCHIP1_AMUTESEL0(n)                  ((n) << 0)
+#define CFGCHIP1_AMUTESEL0_MASK                        CFGCHIP1_AMUTESEL0(0xf)
+#define CFGCHIP1_AMUTESEL0_LOW                 CFGCHIP1_AMUTESEL0(0x0)
+#define CFGCHIP1_AMUTESEL0_BANK_0              CFGCHIP1_AMUTESEL0(0x1)
+#define CFGCHIP1_AMUTESEL0_BANK_1              CFGCHIP1_AMUTESEL0(0x2)
+#define CFGCHIP1_AMUTESEL0_BANK_2              CFGCHIP1_AMUTESEL0(0x3)
+#define CFGCHIP1_AMUTESEL0_BANK_3              CFGCHIP1_AMUTESEL0(0x4)
+#define CFGCHIP1_AMUTESEL0_BANK_4              CFGCHIP1_AMUTESEL0(0x5)
+#define CFGCHIP1_AMUTESEL0_BANK_5              CFGCHIP1_AMUTESEL0(0x6)
+#define CFGCHIP1_AMUTESEL0_BANK_6              CFGCHIP1_AMUTESEL0(0x7)
+#define CFGCHIP1_AMUTESEL0_BANK_7              CFGCHIP1_AMUTESEL0(0x8)
+
+/* CFGCHIP2 (USB PHY) register bits */
+#define CFGCHIP2_PHYCLKGD                      BIT(17)
+#define CFGCHIP2_VBUSSENSE                     BIT(16)
+#define CFGCHIP2_RESET                         BIT(15)
+#define CFGCHIP2_OTGMODE(n)                    ((n) << 13)
+#define CFGCHIP2_OTGMODE_MASK                  CFGCHIP2_OTGMODE(0x3)
+#define CFGCHIP2_OTGMODE_NO_OVERRIDE           CFGCHIP2_OTGMODE(0x0)
+#define CFGCHIP2_OTGMODE_FORCE_HOST            CFGCHIP2_OTGMODE(0x1)
+#define CFGCHIP2_OTGMODE_FORCE_DEVICE          CFGCHIP2_OTGMODE(0x2)
+#define CFGCHIP2_OTGMODE_FORCE_HOST_VBUS_LOW   CFGCHIP2_OTGMODE(0x3)
+#define CFGCHIP2_USB1PHYCLKMUX                 BIT(12)
+#define CFGCHIP2_USB2PHYCLKMUX                 BIT(11)
+#define CFGCHIP2_PHYPWRDN                      BIT(10)
+#define CFGCHIP2_OTGPWRDN                      BIT(9)
+#define CFGCHIP2_DATPOL                                BIT(8)
+#define CFGCHIP2_USB1SUSPENDM                  BIT(7)
+#define CFGCHIP2_PHY_PLLON                     BIT(6)
+#define CFGCHIP2_SESENDEN                      BIT(5)
+#define CFGCHIP2_VBDTCTEN                      BIT(4)
+#define CFGCHIP2_REFFREQ(n)                    ((n) << 0)
+#define CFGCHIP2_REFFREQ_MASK                  CFGCHIP2_REFFREQ(0xf)
+#define CFGCHIP2_REFFREQ_12MHZ                 CFGCHIP2_REFFREQ(0x1)
+#define CFGCHIP2_REFFREQ_24MHZ                 CFGCHIP2_REFFREQ(0x2)
+#define CFGCHIP2_REFFREQ_48MHZ                 CFGCHIP2_REFFREQ(0x3)
+#define CFGCHIP2_REFFREQ_19_2MHZ               CFGCHIP2_REFFREQ(0x4)
+#define CFGCHIP2_REFFREQ_38_4MHZ               CFGCHIP2_REFFREQ(0x5)
+#define CFGCHIP2_REFFREQ_13MHZ                 CFGCHIP2_REFFREQ(0x6)
+#define CFGCHIP2_REFFREQ_26MHZ                 CFGCHIP2_REFFREQ(0x7)
+#define CFGCHIP2_REFFREQ_20MHZ                 CFGCHIP2_REFFREQ(0x8)
+#define CFGCHIP2_REFFREQ_40MHZ                 CFGCHIP2_REFFREQ(0x9)
+
+/* CFGCHIP3 (EMAC/uPP/PLL1/ASYNC3/PRU/DIV4.5/EMIFA) register bits */
+#define CFGCHIP3_RMII_SEL                      BIT(8)
+#define CFGCHIP3_UPP_TX_CLKSRC                 BIT(6)
+#define CFGCHIP3_PLL1_MASTER_LOCK              BIT(5)
+#define CFGCHIP3_ASYNC3_CLKSRC                 BIT(4)
+#define CFGCHIP3_PRUEVTSEL                     BIT(3)
+#define CFGCHIP3_DIV45PENA                     BIT(2)
+#define CFGCHIP3_EMA_CLKSRC                    BIT(1)
+
+/* CFGCHIP4 (McASP0 AMUNTEIN) register bits */
+#define CFGCHIP4_AMUTECLR0                     BIT(0)
+
+#endif /* __LINUX_MFD_DA8XX_CFGCHIP_H */
index 2567a87..7f55b8b 100644 (file)
 /*
  * time in us for processing a single channel, calculated as follows:
  *
- * num cycles = open delay + (sample delay + conv time) * averaging
+ * max num cycles = open delay + (sample delay + conv time) * averaging
  *
- * num cycles: 152 + (1 + 13) * 16 = 376
+ * max num cycles: 262143 + (255 + 13) * 16 = 266431
  *
  * clock frequency: 26MHz / 8 = 3.25MHz
  * clock period: 1 / 3.25MHz = 308ns
  *
- * processing time: 376 * 308ns = 116us
+ * max processing time: 266431 * 308ns = 83ms(approx)
  */
-#define IDLE_TIMEOUT 116 /* microsec */
+#define IDLE_TIMEOUT 83 /* milliseconds */
 
 #define TSCADC_CELLS           2
 
index 2566f6d..7c3c0d3 100644 (file)
@@ -170,12 +170,12 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd,
 int mlx5_init_cq_table(struct mlx5_core_dev *dev);
 void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev);
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-                       struct mlx5_create_cq_mbox_in *in, int inlen);
+                       u32 *in, int inlen);
 int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
 int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-                      struct mlx5_query_cq_mbox_out *out);
+                      u32 *out, int outlen);
 int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-                       struct mlx5_modify_cq_mbox_in *in, int in_sz);
+                       u32 *in, int inlen);
 int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev,
                                   struct mlx5_core_cq *cq, u16 cq_period,
                                   u16 cq_max_count);
index 0b6d15c..77c1417 100644 (file)
@@ -197,19 +197,6 @@ enum {
        MLX5_PCIE_CTRL_TPH_MASK         = 3 << 4,
 };
 
-enum {
-       MLX5_ACCESS_MODE_PA     = 0,
-       MLX5_ACCESS_MODE_MTT    = 1,
-       MLX5_ACCESS_MODE_KLM    = 2
-};
-
-enum {
-       MLX5_MKEY_REMOTE_INVAL  = 1 << 24,
-       MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29,
-       MLX5_MKEY_BSF_EN        = 1 << 30,
-       MLX5_MKEY_LEN64         = 1 << 31,
-};
-
 enum {
        MLX5_EN_RD      = (u64)1,
        MLX5_EN_WR      = (u64)2
@@ -411,33 +398,6 @@ enum {
        MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16
 };
 
-struct mlx5_inbox_hdr {
-       __be16          opcode;
-       u8              rsvd[4];
-       __be16          opmod;
-};
-
-struct mlx5_outbox_hdr {
-       u8              status;
-       u8              rsvd[3];
-       __be32          syndrome;
-};
-
-struct mlx5_cmd_query_adapter_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_cmd_query_adapter_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd0[24];
-       u8                      intapin;
-       u8                      rsvd1[13];
-       __be16                  vsd_vendor_id;
-       u8                      vsd[208];
-       u8                      vsd_psid[16];
-};
-
 enum mlx5_odp_transport_cap_bits {
        MLX5_ODP_SUPPORT_SEND    = 1 << 31,
        MLX5_ODP_SUPPORT_RECV    = 1 << 30,
@@ -455,30 +415,6 @@ struct mlx5_odp_caps {
        char reserved2[0xe4];
 };
 
-struct mlx5_cmd_init_hca_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       u8                      rsvd0[2];
-       __be16                  profile;
-       u8                      rsvd1[4];
-};
-
-struct mlx5_cmd_init_hca_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_cmd_teardown_hca_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       u8                      rsvd0[2];
-       __be16                  profile;
-       u8                      rsvd1[4];
-};
-
-struct mlx5_cmd_teardown_hca_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-};
-
 struct mlx5_cmd_layout {
        u8              type;
        u8              rsvd0[3];
@@ -494,7 +430,6 @@ struct mlx5_cmd_layout {
        u8              status_own;
 };
 
-
 struct health_buffer {
        __be32          assert_var[5];
        __be32          rsvd0[3];
@@ -856,245 +791,15 @@ struct mlx5_cqe128 {
        struct mlx5_cqe64       cqe64;
 };
 
-struct mlx5_srq_ctx {
-       u8                      state_log_sz;
-       u8                      rsvd0[3];
-       __be32                  flags_xrcd;
-       __be32                  pgoff_cqn;
-       u8                      rsvd1[4];
-       u8                      log_pg_sz;
-       u8                      rsvd2[7];
-       __be32                  pd;
-       __be16                  lwm;
-       __be16                  wqe_cnt;
-       u8                      rsvd3[8];
-       __be64                  db_record;
-};
-
-struct mlx5_create_srq_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  input_srqn;
-       u8                      rsvd0[4];
-       struct mlx5_srq_ctx     ctx;
-       u8                      rsvd1[208];
-       __be64                  pas[0];
-};
-
-struct mlx5_create_srq_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       __be32                  srqn;
-       u8                      rsvd[4];
-};
-
-struct mlx5_destroy_srq_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  srqn;
-       u8                      rsvd[4];
-};
-
-struct mlx5_destroy_srq_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_query_srq_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  srqn;
-       u8                      rsvd0[4];
-};
-
-struct mlx5_query_srq_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd0[8];
-       struct mlx5_srq_ctx     ctx;
-       u8                      rsvd1[32];
-       __be64                  pas[0];
-};
-
-struct mlx5_arm_srq_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  srqn;
-       __be16                  rsvd;
-       __be16                  lwm;
-};
-
-struct mlx5_arm_srq_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_cq_context {
-       u8                      status;
-       u8                      cqe_sz_flags;
-       u8                      st;
-       u8                      rsvd3;
-       u8                      rsvd4[6];
-       __be16                  page_offset;
-       __be32                  log_sz_usr_page;
-       __be16                  cq_period;
-       __be16                  cq_max_count;
-       __be16                  rsvd20;
-       __be16                  c_eqn;
-       u8                      log_pg_sz;
-       u8                      rsvd25[7];
-       __be32                  last_notified_index;
-       __be32                  solicit_producer_index;
-       __be32                  consumer_counter;
-       __be32                  producer_counter;
-       u8                      rsvd48[8];
-       __be64                  db_record_addr;
-};
-
-struct mlx5_create_cq_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  input_cqn;
-       u8                      rsvdx[4];
-       struct mlx5_cq_context  ctx;
-       u8                      rsvd6[192];
-       __be64                  pas[0];
-};
-
-struct mlx5_create_cq_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       __be32                  cqn;
-       u8                      rsvd0[4];
-};
-
-struct mlx5_destroy_cq_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  cqn;
-       u8                      rsvd0[4];
-};
-
-struct mlx5_destroy_cq_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd0[8];
-};
-
-struct mlx5_query_cq_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  cqn;
-       u8                      rsvd0[4];
-};
-
-struct mlx5_query_cq_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd0[8];
-       struct mlx5_cq_context  ctx;
-       u8                      rsvd6[16];
-       __be64                  pas[0];
-};
-
-struct mlx5_modify_cq_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  cqn;
-       __be32                  field_select;
-       struct mlx5_cq_context  ctx;
-       u8                      rsvd[192];
-       __be64                  pas[0];
-};
-
-struct mlx5_modify_cq_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_enable_hca_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_enable_hca_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_disable_hca_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_disable_hca_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_eq_context {
-       u8                      status;
-       u8                      ec_oi;
-       u8                      st;
-       u8                      rsvd2[7];
-       __be16                  page_pffset;
-       __be32                  log_sz_usr_page;
-       u8                      rsvd3[7];
-       u8                      intr;
-       u8                      log_page_size;
-       u8                      rsvd4[15];
-       __be32                  consumer_counter;
-       __be32                  produser_counter;
-       u8                      rsvd5[16];
-};
-
-struct mlx5_create_eq_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       u8                      rsvd0[3];
-       u8                      input_eqn;
-       u8                      rsvd1[4];
-       struct mlx5_eq_context  ctx;
-       u8                      rsvd2[8];
-       __be64                  events_mask;
-       u8                      rsvd3[176];
-       __be64                  pas[0];
-};
-
-struct mlx5_create_eq_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd0[3];
-       u8                      eq_number;
-       u8                      rsvd1[4];
-};
-
-struct mlx5_destroy_eq_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       u8                      rsvd0[3];
-       u8                      eqn;
-       u8                      rsvd1[4];
-};
-
-struct mlx5_destroy_eq_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_map_eq_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be64                  mask;
-       u8                      mu;
-       u8                      rsvd0[2];
-       u8                      eqn;
-       u8                      rsvd1[24];
-};
-
-struct mlx5_map_eq_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_query_eq_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       u8                      rsvd0[3];
-       u8                      eqn;
-       u8                      rsvd1[4];
-};
-
-struct mlx5_query_eq_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-       struct mlx5_eq_context  ctx;
+enum {
+       MLX5_MKEY_STATUS_FREE = 1 << 6,
 };
 
 enum {
-       MLX5_MKEY_STATUS_FREE = 1 << 6,
+       MLX5_MKEY_REMOTE_INVAL  = 1 << 24,
+       MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29,
+       MLX5_MKEY_BSF_EN        = 1 << 30,
+       MLX5_MKEY_LEN64         = 1 << 31,
 };
 
 struct mlx5_mkey_seg {
@@ -1119,134 +824,12 @@ struct mlx5_mkey_seg {
        u8              rsvd4[4];
 };
 
-struct mlx5_query_special_ctxs_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_query_special_ctxs_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       __be32                  dump_fill_mkey;
-       __be32                  reserved_lkey;
-};
-
-struct mlx5_create_mkey_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  input_mkey_index;
-       __be32                  flags;
-       struct mlx5_mkey_seg    seg;
-       u8                      rsvd1[16];
-       __be32                  xlat_oct_act_size;
-       __be32                  rsvd2;
-       u8                      rsvd3[168];
-       __be64                  pas[0];
-};
-
-struct mlx5_create_mkey_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       __be32                  mkey;
-       u8                      rsvd[4];
-};
-
-struct mlx5_destroy_mkey_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  mkey;
-       u8                      rsvd[4];
-};
-
-struct mlx5_destroy_mkey_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_query_mkey_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  mkey;
-};
-
-struct mlx5_query_mkey_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       __be64                  pas[0];
-};
-
-struct mlx5_modify_mkey_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  mkey;
-       __be64                  pas[0];
-};
-
-struct mlx5_modify_mkey_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_dump_mkey_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-};
-
-struct mlx5_dump_mkey_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       __be32                  mkey;
-};
-
-struct mlx5_mad_ifc_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be16                  remote_lid;
-       u8                      rsvd0;
-       u8                      port;
-       u8                      rsvd1[4];
-       u8                      data[256];
-};
-
-struct mlx5_mad_ifc_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-       u8                      data[256];
-};
-
-struct mlx5_access_reg_mbox_in {
-       struct mlx5_inbox_hdr           hdr;
-       u8                              rsvd0[2];
-       __be16                          register_id;
-       __be32                          arg;
-       __be32                          data[0];
-};
-
-struct mlx5_access_reg_mbox_out {
-       struct mlx5_outbox_hdr          hdr;
-       u8                              rsvd[8];
-       __be32                          data[0];
-};
-
 #define MLX5_ATTR_EXTENDED_PORT_INFO   cpu_to_be16(0xff90)
 
 enum {
        MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO        = 1 <<  0
 };
 
-struct mlx5_allocate_psv_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  npsv_pd;
-       __be32                  rsvd_psv0;
-};
-
-struct mlx5_allocate_psv_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-       __be32                  psv_idx[4];
-};
-
-struct mlx5_destroy_psv_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  psv_number;
-       u8                      rsvd[4];
-};
-
-struct mlx5_destroy_psv_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-};
-
 enum {
        VPORT_STATE_DOWN                = 0x0,
        VPORT_STATE_UP                  = 0x1,
@@ -1381,6 +964,18 @@ enum mlx5_cap_type {
 #define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \
        MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap)
 
+#define MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) \
+       MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_sniffer.cap)
+
+#define MLX5_CAP_FLOWTABLE_SNIFFER_RX_MAX(mdev, cap) \
+       MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive_sniffer.cap)
+
+#define MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) \
+       MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit_sniffer.cap)
+
+#define MLX5_CAP_FLOWTABLE_SNIFFER_TX_MAX(mdev, cap) \
+       MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit_sniffer.cap)
+
 #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \
        MLX5_GET(flow_table_eswitch_cap, \
                 mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
index ccea6fb..85c4786 100644 (file)
 #include <linux/mlx5/doorbell.h>
 #include <linux/mlx5/srq.h>
 
-enum {
-       MLX5_RQ_BITMASK_VSD = 1 << 1,
-};
-
 enum {
        MLX5_BOARD_ID_LEN = 64,
        MLX5_MAX_NAME_LEN = 16,
@@ -481,6 +477,7 @@ struct mlx5_fc_stats {
 };
 
 struct mlx5_eswitch;
+struct mlx5_lag;
 
 struct mlx5_rl_entry {
        u32                     rate;
@@ -554,6 +551,7 @@ struct mlx5_priv {
        struct mlx5_flow_steering *steering;
        struct mlx5_eswitch     *eswitch;
        struct mlx5_core_sriov  sriov;
+       struct mlx5_lag         *lag;
        unsigned long           pci_dev_data;
        struct mlx5_fc_stats            fc_stats;
        struct mlx5_rl_table            rl_table;
@@ -771,14 +769,15 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev);
 void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
-int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
-int mlx5_cmd_status_to_err_v2(void *ptr);
-int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
+
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
                  int out_size);
 int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
                     void *out, int out_size, mlx5_cmd_cbk_t callback,
                     void *context);
+void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
+
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
 int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
 int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
 int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
@@ -807,15 +806,18 @@ int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
                      u16 lwm, int is_srq);
 void mlx5_init_mkey_table(struct mlx5_core_dev *dev);
 void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev);
+int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
+                            struct mlx5_core_mkey *mkey,
+                            u32 *in, int inlen,
+                            u32 *out, int outlen,
+                            mlx5_cmd_cbk_t callback, void *context);
 int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
                          struct mlx5_core_mkey *mkey,
-                         struct mlx5_create_mkey_mbox_in *in, int inlen,
-                         mlx5_cmd_cbk_t callback, void *context,
-                         struct mlx5_create_mkey_mbox_out *out);
+                         u32 *in, int inlen);
 int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
                           struct mlx5_core_mkey *mkey);
 int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
-                        struct mlx5_query_mkey_mbox_out *out, int outlen);
+                        u32 *out, int outlen);
 int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey,
                             u32 *mkey);
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
@@ -826,8 +828,6 @@ void mlx5_pagealloc_init(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev);
 int mlx5_pagealloc_start(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev);
-int mlx5_sriov_init(struct mlx5_core_dev *dev);
-int mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
                                 s32 npages);
 int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot);
@@ -865,7 +865,7 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
 int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
-                      struct mlx5_query_eq_mbox_out *out, int outlen);
+                      u32 *out, int outlen);
 int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
 void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
 int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
@@ -930,6 +930,8 @@ enum {
 struct mlx5_interface {
        void *                  (*add)(struct mlx5_core_dev *dev);
        void                    (*remove)(struct mlx5_core_dev *dev, void *context);
+       int                     (*attach)(struct mlx5_core_dev *dev, void *context);
+       void                    (*detach)(struct mlx5_core_dev *dev, void *context);
        void                    (*event)(struct mlx5_core_dev *dev, void *context,
                                         enum mlx5_dev_event event, unsigned long param);
        void *                  (*get_dev)(void *context);
@@ -942,6 +944,11 @@ int mlx5_register_interface(struct mlx5_interface *intf);
 void mlx5_unregister_interface(struct mlx5_interface *intf);
 int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id);
 
+int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
+int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
+bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
+struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
+
 struct mlx5_profile {
        u64     mask;
        u8      log_max_qp;
index e036d60..93ebc5e 100644 (file)
@@ -54,6 +54,7 @@ static inline void build_leftovers_ft_param(int *priority,
 
 enum mlx5_flow_namespace_type {
        MLX5_FLOW_NAMESPACE_BYPASS,
+       MLX5_FLOW_NAMESPACE_LAG,
        MLX5_FLOW_NAMESPACE_OFFLOADS,
        MLX5_FLOW_NAMESPACE_ETHTOOL,
        MLX5_FLOW_NAMESPACE_KERNEL,
@@ -62,6 +63,8 @@ enum mlx5_flow_namespace_type {
        MLX5_FLOW_NAMESPACE_FDB,
        MLX5_FLOW_NAMESPACE_ESW_EGRESS,
        MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+       MLX5_FLOW_NAMESPACE_SNIFFER_RX,
+       MLX5_FLOW_NAMESPACE_SNIFFER_TX,
 };
 
 struct mlx5_flow_table;
@@ -106,6 +109,9 @@ mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
                             int prio,
                             int num_flow_table_entries,
                             u32 level, u16 vport);
+struct mlx5_flow_table *mlx5_create_lag_demux_flow_table(
+                                              struct mlx5_flow_namespace *ns,
+                                              int prio, u32 level);
 int mlx5_destroy_flow_table(struct mlx5_flow_table *ft);
 
 /* inbox should be set with the following values:
index 21bc455..6045d4d 100644 (file)
@@ -152,7 +152,7 @@ enum {
        MLX5_CMD_OP_CONFIG_INT_MODERATION         = 0x804,
        MLX5_CMD_OP_ACCESS_REG                    = 0x805,
        MLX5_CMD_OP_ATTACH_TO_MCG                 = 0x806,
-       MLX5_CMD_OP_DETTACH_FROM_MCG              = 0x807,
+       MLX5_CMD_OP_DETACH_FROM_MCG               = 0x807,
        MLX5_CMD_OP_GET_DROPPED_PACKET_LOG        = 0x80a,
        MLX5_CMD_OP_MAD_IFC                       = 0x50d,
        MLX5_CMD_OP_QUERY_MAD_DEMUX               = 0x80b,
@@ -174,6 +174,12 @@ enum {
        MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY         = 0x82b,
        MLX5_CMD_OP_SET_WOL_ROL                   = 0x830,
        MLX5_CMD_OP_QUERY_WOL_ROL                 = 0x831,
+       MLX5_CMD_OP_CREATE_LAG                    = 0x840,
+       MLX5_CMD_OP_MODIFY_LAG                    = 0x841,
+       MLX5_CMD_OP_QUERY_LAG                     = 0x842,
+       MLX5_CMD_OP_DESTROY_LAG                   = 0x843,
+       MLX5_CMD_OP_CREATE_VPORT_LAG              = 0x844,
+       MLX5_CMD_OP_DESTROY_VPORT_LAG             = 0x845,
        MLX5_CMD_OP_CREATE_TIR                    = 0x900,
        MLX5_CMD_OP_MODIFY_TIR                    = 0x901,
        MLX5_CMD_OP_DESTROY_TIR                   = 0x902,
@@ -212,6 +218,8 @@ enum {
        MLX5_CMD_OP_DEALLOC_FLOW_COUNTER          = 0x93a,
        MLX5_CMD_OP_QUERY_FLOW_COUNTER            = 0x93b,
        MLX5_CMD_OP_MODIFY_FLOW_TABLE             = 0x93c,
+       MLX5_CMD_OP_ALLOC_ENCAP_HEADER            = 0x93d,
+       MLX5_CMD_OP_DEALLOC_ENCAP_HEADER          = 0x93e,
        MLX5_CMD_OP_MAX
 };
 
@@ -281,7 +289,9 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
        u8         modify_root[0x1];
        u8         identified_miss_table_mode[0x1];
        u8         flow_table_modify[0x1];
-       u8         reserved_at_7[0x19];
+       u8         encap[0x1];
+       u8         decap[0x1];
+       u8         reserved_at_9[0x17];
 
        u8         reserved_at_20[0x2];
        u8         log_max_ft_size[0x6];
@@ -473,7 +483,9 @@ struct mlx5_ifc_ads_bits {
 
 struct mlx5_ifc_flow_table_nic_cap_bits {
        u8         nic_rx_multi_path_tirs[0x1];
-       u8         reserved_at_1[0x1ff];
+       u8         nic_rx_multi_path_tirs_fts[0x1];
+       u8         allow_sniffer_and_nic_rx_shared_tir[0x1];
+       u8         reserved_at_3[0x1fd];
 
        struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive;
 
@@ -512,7 +524,15 @@ struct mlx5_ifc_e_switch_cap_bits {
        u8         nic_vport_node_guid_modify[0x1];
        u8         nic_vport_port_guid_modify[0x1];
 
-       u8         reserved_at_20[0x7e0];
+       u8         vxlan_encap_decap[0x1];
+       u8         nvgre_encap_decap[0x1];
+       u8         reserved_at_22[0x9];
+       u8         log_max_encap_headers[0x5];
+       u8         reserved_2b[0x6];
+       u8         max_encap_header_size[0xa];
+
+       u8         reserved_40[0x7c0];
+
 };
 
 struct mlx5_ifc_qos_cap_bits {
@@ -767,7 +787,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         out_of_seq_cnt[0x1];
        u8         vport_counters[0x1];
        u8         retransmission_q_counters[0x1];
-       u8         reserved_at_183[0x3];
+       u8         reserved_at_183[0x1];
+       u8         modify_rq_counter_set_id[0x1];
+       u8         reserved_at_185[0x1];
        u8         max_qp_cnt[0xa];
        u8         pkey_table_size[0x10];
 
@@ -870,7 +892,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         pad_tx_eth_packet[0x1];
        u8         reserved_at_263[0x8];
        u8         log_bf_reg_size[0x5];
-       u8         reserved_at_270[0x10];
+
+       u8         reserved_at_270[0xb];
+       u8         lag_master[0x1];
+       u8         num_lag_ports[0x4];
 
        u8         reserved_at_280[0x10];
        u8         max_wqe_sz_sq[0x10];
@@ -1904,7 +1929,7 @@ enum {
 
 struct mlx5_ifc_qpc_bits {
        u8         state[0x4];
-       u8         reserved_at_4[0x4];
+       u8         lag_tx_port_affinity[0x4];
        u8         st[0x8];
        u8         reserved_at_10[0x3];
        u8         pm_state[0x2];
@@ -1966,7 +1991,10 @@ struct mlx5_ifc_qpc_bits {
        u8         reserved_at_3e0[0x8];
        u8         cqn_snd[0x18];
 
-       u8         reserved_at_400[0x40];
+       u8         reserved_at_400[0x8];
+       u8         deth_sqpn[0x18];
+
+       u8         reserved_at_420[0x20];
 
        u8         reserved_at_440[0x8];
        u8         last_acked_psn[0x18];
@@ -2064,6 +2092,8 @@ enum {
        MLX5_FLOW_CONTEXT_ACTION_DROP      = 0x2,
        MLX5_FLOW_CONTEXT_ACTION_FWD_DEST  = 0x4,
        MLX5_FLOW_CONTEXT_ACTION_COUNT     = 0x8,
+       MLX5_FLOW_CONTEXT_ACTION_ENCAP     = 0x10,
+       MLX5_FLOW_CONTEXT_ACTION_DECAP     = 0x20,
 };
 
 struct mlx5_ifc_flow_context_bits {
@@ -2083,7 +2113,9 @@ struct mlx5_ifc_flow_context_bits {
        u8         reserved_at_a0[0x8];
        u8         flow_counter_list_size[0x18];
 
-       u8         reserved_at_c0[0x140];
+       u8         encap_id[0x20];
+
+       u8         reserved_at_e0[0x120];
 
        struct mlx5_ifc_fte_match_param_bits match_value;
 
@@ -2146,7 +2178,11 @@ struct mlx5_ifc_traffic_counter_bits {
 };
 
 struct mlx5_ifc_tisc_bits {
-       u8         reserved_at_0[0xc];
+       u8         strict_lag_tx_port_affinity[0x1];
+       u8         reserved_at_1[0x3];
+       u8         lag_tx_port_affinity[0x04];
+
+       u8         reserved_at_8[0x4];
        u8         prio[0x4];
        u8         reserved_at_10[0x10];
 
@@ -2808,7 +2844,7 @@ struct mlx5_ifc_xrqc_bits {
 
        struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context;
 
-       u8         reserved_at_180[0x180];
+       u8         reserved_at_180[0x200];
 
        struct mlx5_ifc_wq_bits wq;
 };
@@ -3489,7 +3525,7 @@ struct mlx5_ifc_query_special_contexts_out_bits {
 
        u8         syndrome[0x20];
 
-       u8         reserved_at_40[0x20];
+       u8         dump_fill_mkey[0x20];
 
        u8         resd_lkey[0x20];
 };
@@ -4213,6 +4249,85 @@ struct mlx5_ifc_query_eq_in_bits {
        u8         reserved_at_60[0x20];
 };
 
+struct mlx5_ifc_encap_header_in_bits {
+       u8         reserved_at_0[0x5];
+       u8         header_type[0x3];
+       u8         reserved_at_8[0xe];
+       u8         encap_header_size[0xa];
+
+       u8         reserved_at_20[0x10];
+       u8         encap_header[2][0x8];
+
+       u8         more_encap_header[0][0x8];
+};
+
+struct mlx5_ifc_query_encap_header_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0xa0];
+
+       struct mlx5_ifc_encap_header_in_bits encap_header[0];
+};
+
+struct mlx5_ifc_query_encap_header_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         encap_id[0x20];
+
+       u8         reserved_at_60[0xa0];
+};
+
+struct mlx5_ifc_alloc_encap_header_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         encap_id[0x20];
+
+       u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_alloc_encap_header_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         reserved_at_40[0xa0];
+
+       struct mlx5_ifc_encap_header_in_bits encap_header;
+};
+
+struct mlx5_ifc_dealloc_encap_header_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_dealloc_encap_header_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         encap_id[0x20];
+
+       u8         reserved_60[0x20];
+};
+
 struct mlx5_ifc_query_dct_out_bits {
        u8         status[0x8];
        u8         reserved_at_8[0x18];
@@ -4517,7 +4632,9 @@ struct mlx5_ifc_modify_tis_out_bits {
 struct mlx5_ifc_modify_tis_bitmask_bits {
        u8         reserved_at_0[0x20];
 
-       u8         reserved_at_20[0x1f];
+       u8         reserved_at_20[0x1d];
+       u8         lag_tx_port_affinity[0x1];
+       u8         strict_lag_tx_port_affinity[0x1];
        u8         prio[0x1];
 };
 
@@ -4652,6 +4769,11 @@ struct mlx5_ifc_modify_rq_out_bits {
        u8         reserved_at_40[0x40];
 };
 
+enum {
+       MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1,
+       MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID = 1ULL << 3,
+};
+
 struct mlx5_ifc_modify_rq_in_bits {
        u8         opcode[0x10];
        u8         reserved_at_10[0x10];
@@ -4721,7 +4843,7 @@ struct mlx5_ifc_modify_nic_vport_field_select_bits {
        u8         reserved_at_0[0x16];
        u8         node_guid[0x1];
        u8         port_guid[0x1];
-       u8         reserved_at_18[0x1];
+       u8         min_inline[0x1];
        u8         mtu[0x1];
        u8         change_event[0x1];
        u8         promisc[0x1];
@@ -6099,7 +6221,9 @@ struct mlx5_ifc_create_flow_table_in_bits {
 
        u8         reserved_at_a0[0x20];
 
-       u8         reserved_at_c0[0x4];
+       u8         encap_en[0x1];
+       u8         decap_en[0x1];
+       u8         reserved_at_c2[0x2];
        u8         table_miss_mode[0x4];
        u8         level[0x8];
        u8         reserved_at_d0[0x8];
@@ -6108,7 +6232,10 @@ struct mlx5_ifc_create_flow_table_in_bits {
        u8         reserved_at_e0[0x8];
        u8         table_miss_id[0x18];
 
-       u8         reserved_at_100[0x100];
+       u8         reserved_at_100[0x8];
+       u8         lag_master_next_table_id[0x18];
+
+       u8         reserved_at_120[0x80];
 };
 
 struct mlx5_ifc_create_flow_group_out_bits {
@@ -6710,9 +6837,10 @@ struct mlx5_ifc_pude_reg_bits {
 };
 
 struct mlx5_ifc_ptys_reg_bits {
-       u8         an_disable_cap[0x1];
+       u8         reserved_at_0[0x1];
        u8         an_disable_admin[0x1];
-       u8         reserved_at_2[0x6];
+       u8         an_disable_cap[0x1];
+       u8         reserved_at_3[0x5];
        u8         local_port[0x8];
        u8         reserved_at_10[0xd];
        u8         proto_mask[0x3];
@@ -7562,7 +7690,8 @@ struct mlx5_ifc_set_flow_table_root_in_bits {
 };
 
 enum {
-       MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID = 0x1,
+       MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID     = (1UL << 0),
+       MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID = (1UL << 15),
 };
 
 struct mlx5_ifc_modify_flow_table_out_bits {
@@ -7601,7 +7730,10 @@ struct mlx5_ifc_modify_flow_table_in_bits {
        u8         reserved_at_e0[0x8];
        u8         table_miss_id[0x18];
 
-       u8         reserved_at_100[0x100];
+       u8         reserved_at_100[0x8];
+       u8         lag_master_next_table_id[0x18];
+
+       u8         reserved_at_120[0x80];
 };
 
 struct mlx5_ifc_ets_tcn_config_reg_bits {
@@ -7709,4 +7841,134 @@ struct mlx5_ifc_dcbx_param_bits {
        u8         error[0x8];
        u8         reserved_at_a0[0x160];
 };
+
+struct mlx5_ifc_lagc_bits {
+       u8         reserved_at_0[0x1d];
+       u8         lag_state[0x3];
+
+       u8         reserved_at_20[0x14];
+       u8         tx_remap_affinity_2[0x4];
+       u8         reserved_at_38[0x4];
+       u8         tx_remap_affinity_1[0x4];
+};
+
+struct mlx5_ifc_create_lag_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_create_lag_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       struct mlx5_ifc_lagc_bits ctx;
+};
+
+struct mlx5_ifc_modify_lag_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_modify_lag_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         reserved_at_40[0x20];
+       u8         field_select[0x20];
+
+       struct mlx5_ifc_lagc_bits ctx;
+};
+
+struct mlx5_ifc_query_lag_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x40];
+
+       struct mlx5_ifc_lagc_bits ctx;
+};
+
+struct mlx5_ifc_query_lag_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_destroy_lag_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_destroy_lag_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_create_vport_lag_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_create_vport_lag_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_destroy_vport_lag_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_destroy_vport_lag_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         reserved_at_40[0x40];
+};
+
 #endif /* MLX5_IFC_H */
index e3012cc..b3065ac 100644 (file)
@@ -61,6 +61,39 @@ enum mlx5_an_status {
 #define MLX5_I2C_ADDR_HIGH             0x51
 #define MLX5_EEPROM_PAGE_LENGTH                256
 
+enum mlx5e_link_mode {
+       MLX5E_1000BASE_CX_SGMII  = 0,
+       MLX5E_1000BASE_KX        = 1,
+       MLX5E_10GBASE_CX4        = 2,
+       MLX5E_10GBASE_KX4        = 3,
+       MLX5E_10GBASE_KR         = 4,
+       MLX5E_20GBASE_KR2        = 5,
+       MLX5E_40GBASE_CR4        = 6,
+       MLX5E_40GBASE_KR4        = 7,
+       MLX5E_56GBASE_R4         = 8,
+       MLX5E_10GBASE_CR         = 12,
+       MLX5E_10GBASE_SR         = 13,
+       MLX5E_10GBASE_ER         = 14,
+       MLX5E_40GBASE_SR4        = 15,
+       MLX5E_40GBASE_LR4        = 16,
+       MLX5E_50GBASE_SR2        = 18,
+       MLX5E_100GBASE_CR4       = 20,
+       MLX5E_100GBASE_SR4       = 21,
+       MLX5E_100GBASE_KR4       = 22,
+       MLX5E_100GBASE_LR4       = 23,
+       MLX5E_100BASE_TX         = 24,
+       MLX5E_1000BASE_T         = 25,
+       MLX5E_10GBASE_T          = 26,
+       MLX5E_25GBASE_CR         = 27,
+       MLX5E_25GBASE_KR         = 28,
+       MLX5E_25GBASE_SR         = 29,
+       MLX5E_50GBASE_CR2        = 30,
+       MLX5E_50GBASE_KR2        = 31,
+       MLX5E_LINK_MODES_NUMBER,
+};
+
+#define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
+
 int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
 int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
                         int ptys_size, int proto_mask, u8 local_port);
@@ -70,9 +103,10 @@ int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev,
                                u32 *proto_admin, int proto_mask);
 int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
                                    u8 *link_width_oper, u8 local_port);
-int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev,
-                              u8 *proto_oper, int proto_mask,
-                              u8 local_port);
+int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
+                                 u8 *proto_oper, u8 local_port);
+int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev,
+                                  u32 *proto_oper, u8 local_port);
 int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable,
                       u32 proto_admin, int proto_mask);
 void mlx5_toggle_port_link(struct mlx5_core_dev *dev);
index 7879bf4..0aacb2a 100644 (file)
@@ -123,12 +123,13 @@ enum {
 };
 
 enum {
-       MLX5_NON_ZERO_RQ        = 0 << 24,
-       MLX5_SRQ_RQ             = 1 << 24,
-       MLX5_CRQ_RQ             = 2 << 24,
-       MLX5_ZERO_LEN_RQ        = 3 << 24
+       MLX5_NON_ZERO_RQ        = 0x0,
+       MLX5_SRQ_RQ             = 0x1,
+       MLX5_CRQ_RQ             = 0x2,
+       MLX5_ZERO_LEN_RQ        = 0x3
 };
 
+/* TODO REM */
 enum {
        /* params1 */
        MLX5_QP_BIT_SRE                         = 1 << 15,
@@ -177,12 +178,6 @@ enum {
        MLX5_FENCE_MODE_SMALL_AND_FENCE         = 4 << 5,
 };
 
-enum {
-       MLX5_QP_LAT_SENSITIVE   = 1 << 28,
-       MLX5_QP_BLOCK_MCAST     = 1 << 30,
-       MLX5_QP_ENABLE_SIG      = 1 << 31,
-};
-
 enum {
        MLX5_RCV_DBR    = 0,
        MLX5_SND_DBR    = 1,
@@ -484,6 +479,7 @@ struct mlx5_qp_path {
        u8                      rmac[6];
 };
 
+/* FIXME: use mlx5_ifc.h qpc */
 struct mlx5_qp_context {
        __be32                  flags;
        __be32                  flags_pd;
@@ -525,99 +521,6 @@ struct mlx5_qp_context {
        u8                      rsvd1[24];
 };
 
-struct mlx5_create_qp_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  input_qpn;
-       u8                      rsvd0[4];
-       __be32                  opt_param_mask;
-       u8                      rsvd1[4];
-       struct mlx5_qp_context  ctx;
-       u8                      rsvd3[16];
-       __be64                  pas[0];
-};
-
-struct mlx5_create_qp_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       __be32                  qpn;
-       u8                      rsvd0[4];
-};
-
-struct mlx5_destroy_qp_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  qpn;
-       u8                      rsvd0[4];
-};
-
-struct mlx5_destroy_qp_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd0[8];
-};
-
-struct mlx5_modify_qp_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  qpn;
-       u8                      rsvd0[4];
-       __be32                  optparam;
-       u8                      rsvd1[4];
-       struct mlx5_qp_context  ctx;
-       u8                      rsvd2[16];
-};
-
-struct mlx5_modify_qp_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd0[8];
-};
-
-struct mlx5_query_qp_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  qpn;
-       u8                      rsvd[4];
-};
-
-struct mlx5_query_qp_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd1[8];
-       __be32                  optparam;
-       u8                      rsvd0[4];
-       struct mlx5_qp_context  ctx;
-       u8                      rsvd2[16];
-       __be64                  pas[0];
-};
-
-struct mlx5_conf_sqp_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  qpn;
-       u8                      rsvd[3];
-       u8                      type;
-};
-
-struct mlx5_conf_sqp_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_alloc_xrcd_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       u8                      rsvd[8];
-};
-
-struct mlx5_alloc_xrcd_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       __be32                  xrcdn;
-       u8                      rsvd[4];
-};
-
-struct mlx5_dealloc_xrcd_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  xrcdn;
-       u8                      rsvd[4];
-};
-
-struct mlx5_dealloc_xrcd_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-};
-
 static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u32 qpn)
 {
        return radix_tree_lookup(&dev->priv.qp_table.tree, qpn);
@@ -628,28 +531,17 @@ static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev,
        return radix_tree_lookup(&dev->priv.mkey_table.tree, key);
 }
 
-struct mlx5_page_fault_resume_mbox_in {
-       struct mlx5_inbox_hdr   hdr;
-       __be32                  flags_qpn;
-       u8                      reserved[4];
-};
-
-struct mlx5_page_fault_resume_mbox_out {
-       struct mlx5_outbox_hdr  hdr;
-       u8                      rsvd[8];
-};
-
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
                        struct mlx5_core_qp *qp,
-                       struct mlx5_create_qp_mbox_in *in,
+                       u32 *in,
                        int inlen);
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation,
-                       struct mlx5_modify_qp_mbox_in *in, int sqd_event,
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode,
+                       u32 opt_param_mask, void *qpc,
                        struct mlx5_core_qp *qp);
 int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
                         struct mlx5_core_qp *qp);
 int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
-                      struct mlx5_query_qp_mbox_out *out, int outlen);
+                      u32 *out, int outlen);
 
 int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn);
 int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn);
index e087b7d..451b0bd 100644 (file)
@@ -45,6 +45,8 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
                                     u16 vport, u8 *addr);
 void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
                                     u8 *min_inline);
+int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+                                    u16 vport, u8 min_inline);
 int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
                                      u16 vport, u8 *addr);
 int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu);
index 08ed53e..ef815b9 100644 (file)
@@ -2014,6 +2014,7 @@ extern void mm_drop_all_locks(struct mm_struct *mm);
 
 extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
 extern struct file *get_mm_exe_file(struct mm_struct *mm);
+extern struct file *get_task_exe_file(struct task_struct *task);
 
 extern bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long npages);
 extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages);
index d572b78..7f2ae99 100644 (file)
@@ -828,9 +828,21 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
  */
 #define zone_idx(zone)         ((zone) - (zone)->zone_pgdat->node_zones)
 
-static inline int populated_zone(struct zone *zone)
+/*
+ * Returns true if a zone has pages managed by the buddy allocator.
+ * All the reclaim decisions have to use this function rather than
+ * populated_zone(). If the whole zone is reserved then we can easily
+ * end up with populated_zone() && !managed_zone().
+ */
+static inline bool managed_zone(struct zone *zone)
+{
+       return zone->managed_pages;
+}
+
+/* Returns true if a zone has memory */
+static inline bool populated_zone(struct zone *zone)
 {
-       return (!!zone->present_pages);
+       return zone->present_pages;
 }
 
 extern int movable_zone;
index b9f0ff4..cd0c8bd 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/kmemcheck.h>
 #include <linux/rcupdate.h>
 #include <linux/once.h>
+#include <linux/fs.h>
 
 #include <uapi/linux/net.h>
 
@@ -128,6 +129,9 @@ struct page;
 struct sockaddr;
 struct msghdr;
 struct module;
+struct sk_buff;
+typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
+                              unsigned int, size_t);
 
 struct proto_ops {
        int             family;
@@ -186,6 +190,8 @@ struct proto_ops {
                                       struct pipe_inode_info *pipe, size_t len, unsigned int flags);
        int             (*set_peek_off)(struct sock *sk, int val);
        int             (*peek_len)(struct socket *sock);
+       int             (*read_sock)(struct sock *sk, read_descriptor_t *desc,
+                                    sk_read_actor_t recv_actor);
 };
 
 #define DECLARE_SOCKADDR(type, dst, src)       \
index 794bb07..2095b6a 100644 (file)
@@ -1031,7 +1031,7 @@ struct netdev_xdp {
  *     Deletes the FDB entry from dev coresponding to addr.
  * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
  *                    struct net_device *dev, struct net_device *filter_dev,
- *                    int idx)
+ *                    int *idx)
  *     Used to add FDB entries to dump requests. Implementers should add
  *     entries to skb and update idx with the number of entries.
  *
@@ -1263,7 +1263,7 @@ struct net_device_ops {
                                                struct netlink_callback *cb,
                                                struct net_device *dev,
                                                struct net_device *filter_dev,
-                                               int idx);
+                                               int *idx);
 
        int                     (*ndo_bridge_setlink)(struct net_device *dev,
                                                      struct nlmsghdr *nlh,
@@ -1562,8 +1562,6 @@ enum netdev_priv_flags {
  *
  *     @xps_maps:      XXX: need comments on this one
  *
- *     @offload_fwd_mark:      Offload device fwding mark
- *
  *     @watchdog_timeo:        Represents the timeout that is used by
  *                             the watchdog (see dev_watchdog())
  *     @watchdog_timer:        List of timers
@@ -1814,9 +1812,6 @@ struct net_device {
 #ifdef CONFIG_NET_CLS_ACT
        struct tcf_proto __rcu  *egress_cl_list;
 #endif
-#ifdef CONFIG_NET_SWITCHDEV
-       u32                     offload_fwd_mark;
-#endif
 
        /* These may be needed for future network-power-down code. */
        struct timer_list       watchdog_timer;
@@ -3271,6 +3266,7 @@ static inline void napi_free_frags(struct napi_struct *napi)
        napi->skb = NULL;
 }
 
+bool netdev_is_rx_handler_busy(struct net_device *dev);
 int netdev_rx_handler_register(struct net_device *dev,
                               rx_handler_func_t *rx_handler,
                               void *rx_handler_data);
index 80ca889..664da00 100644 (file)
@@ -15,6 +15,6 @@ struct nf_acct;
 struct nf_acct *nfnl_acct_find_get(struct net *net, const char *filter_name);
 void nfnl_acct_put(struct nf_acct *acct);
 void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
-extern int nfnl_acct_overquota(const struct sk_buff *skb,
-                             struct nf_acct *nfacct);
+int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb,
+                       struct nf_acct *nfacct);
 #endif /* _NFNL_ACCT_H */
index d8b37ba..7676557 100644 (file)
@@ -794,7 +794,7 @@ struct nvmf_connect_command {
 };
 
 struct nvmf_connect_data {
-       uuid_le         hostid;
+       uuid_be         hostid;
        __le16          cntlid;
        char            resv4[238];
        char            subsysnqn[NVMF_NQN_FIELD_LEN];
index 2599a98..0ab8359 100644 (file)
@@ -682,15 +682,6 @@ struct pci_driver {
 
 #define        to_pci_driver(drv) container_of(drv, struct pci_driver, driver)
 
-/**
- * DEFINE_PCI_DEVICE_TABLE - macro used to describe a pci device table
- * @_table: device table name
- *
- * This macro is deprecated and should not be used in new code.
- */
-#define DEFINE_PCI_DEVICE_TABLE(_table) \
-       const struct pci_device_id _table[]
-
 /**
  * PCI_DEVICE - macro used to describe a specific pci device
  * @vend: the 16 bit PCI Vendor ID
@@ -1251,10 +1242,12 @@ resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno);
 int pci_set_vga_state(struct pci_dev *pdev, bool decode,
                      unsigned int command_bits, u32 flags);
 
-#define PCI_IRQ_NOLEGACY       (1 << 0) /* don't use legacy interrupts */
-#define PCI_IRQ_NOMSI          (1 << 1) /* don't use MSI interrupts */
-#define PCI_IRQ_NOMSIX         (1 << 2) /* don't use MSI-X interrupts */
-#define PCI_IRQ_NOAFFINITY     (1 << 3) /* don't auto-assign affinity */
+#define PCI_IRQ_LEGACY         (1 << 0) /* allow legacy interrupts */
+#define PCI_IRQ_MSI            (1 << 1) /* allow MSI interrupts */
+#define PCI_IRQ_MSIX           (1 << 2) /* allow MSI-X interrupts */
+#define PCI_IRQ_AFFINITY       (1 << 3) /* auto-assign affinity */
+#define PCI_IRQ_ALL_TYPES \
+       (PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX)
 
 /* kmem_cache style wrapper around pci_alloc_consistent() */
 
index 2b6b43c..ccb73a5 100644 (file)
@@ -679,6 +679,10 @@ struct perf_event {
        u64                             (*clock)(void);
        perf_overflow_handler_t         overflow_handler;
        void                            *overflow_handler_context;
+#ifdef CONFIG_BPF_SYSCALL
+       perf_overflow_handler_t         orig_overflow_handler;
+       struct bpf_prog                 *prog;
+#endif
 
 #ifdef CONFIG_EVENT_TRACING
        struct trace_event_call         *tp_event;
@@ -788,6 +792,11 @@ struct perf_output_handle {
        int                             page;
 };
 
+struct bpf_perf_event_data_kern {
+       struct pt_regs *regs;
+       struct perf_sample_data *data;
+};
+
 #ifdef CONFIG_CGROUP_PERF
 
 /*
index 40c0ada..1902763 100644 (file)
@@ -5,28 +5,77 @@
  * (GPL) Version 2, available from the file COPYING in the main directory of
  * this source tree.
  */
+#ifndef _COMMON_HSI_H
+#define _COMMON_HSI_H
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+
+/* dma_addr_t manip */
+#define DMA_LO_LE(x)           cpu_to_le32(lower_32_bits(x))
+#define DMA_HI_LE(x)           cpu_to_le32(upper_32_bits(x))
+#define DMA_REGPAIR_LE(x, val) do { \
+                                       (x).hi = DMA_HI_LE((val)); \
+                                       (x).lo = DMA_LO_LE((val)); \
+                               } while (0)
+
+#define HILO_GEN(hi, lo, type)  ((((type)(hi)) << 32) + (lo))
+#define HILO_64(hi, lo) HILO_GEN((le32_to_cpu(hi)), (le32_to_cpu(lo)), u64)
+#define HILO_64_REGPAIR(regpair)        (HILO_64(regpair.hi, regpair.lo))
+#define HILO_DMA_REGPAIR(regpair)      ((dma_addr_t)HILO_64_REGPAIR(regpair))
 
 #ifndef __COMMON_HSI__
 #define __COMMON_HSI__
 
-#define CORE_SPQE_PAGE_SIZE_BYTES                       4096
 
 #define X_FINAL_CLEANUP_AGG_INT 1
+
+#define EVENT_RING_PAGE_SIZE_BYTES          4096
+
 #define NUM_OF_GLOBAL_QUEUES                            128
+#define COMMON_QUEUE_ENTRY_MAX_BYTE_SIZE        64
+
+#define ISCSI_CDU_TASK_SEG_TYPE       0
+#define RDMA_CDU_TASK_SEG_TYPE        1
+
+#define FW_ASSERT_GENERAL_ATTN_IDX    32
+
+#define MAX_PINNED_CCFC                 32
 
 /* Queue Zone sizes in bytes */
 #define TSTORM_QZONE_SIZE 8
-#define MSTORM_QZONE_SIZE 0
+#define MSTORM_QZONE_SIZE 16
 #define USTORM_QZONE_SIZE 8
 #define XSTORM_QZONE_SIZE 8
 #define YSTORM_QZONE_SIZE 0
 #define PSTORM_QZONE_SIZE 0
 
-#define ETH_MAX_NUM_RX_QUEUES_PER_VF 16
+#define MSTORM_VF_ZONE_DEFAULT_SIZE_LOG        7
+#define ETH_MAX_NUM_RX_QUEUES_PER_VF_DEFAULT   16
+#define ETH_MAX_NUM_RX_QUEUES_PER_VF_DOUBLE    48
+#define ETH_MAX_NUM_RX_QUEUES_PER_VF_QUAD      112
+
+/********************************/
+/* CORE (LIGHT L2) FW CONSTANTS */
+/********************************/
+
+#define CORE_LL2_MAX_RAMROD_PER_CON    8
+#define CORE_LL2_TX_BD_PAGE_SIZE_BYTES 4096
+#define CORE_LL2_RX_BD_PAGE_SIZE_BYTES 4096
+#define CORE_LL2_RX_CQE_PAGE_SIZE_BYTES        4096
+#define CORE_LL2_RX_NUM_NEXT_PAGE_BDS  1
+
+#define CORE_LL2_TX_MAX_BDS_PER_PACKET 12
+
+#define CORE_SPQE_PAGE_SIZE_BYTES      4096
+
+#define MAX_NUM_LL2_RX_QUEUES          32
+#define MAX_NUM_LL2_TX_STATS_COUNTERS  32
 
 #define FW_MAJOR_VERSION       8
 #define FW_MINOR_VERSION       10
-#define FW_REVISION_VERSION    5
+#define FW_REVISION_VERSION    10
 #define FW_ENGINEERING_VERSION 0
 
 /***********************/
 #define NUM_OF_LCIDS           (320)
 #define NUM_OF_LTIDS           (320)
 
+/* Clock values */
+#define MASTER_CLK_FREQ_E4     (375e6)
+#define STORM_CLK_FREQ_E4      (1000e6)
+#define CLK25M_CLK_FREQ_E4     (25e6)
+
+/* Global PXP windows (GTT) */
+#define NUM_OF_GTT             19
+#define GTT_DWORD_SIZE_BITS    10
+#define GTT_BYTE_SIZE_BITS     (GTT_DWORD_SIZE_BITS + 2)
+#define GTT_DWORD_SIZE         BIT(GTT_DWORD_SIZE_BITS)
+
+/* Tools Version */
+#define TOOLS_VERSION 10
+
 /*****************/
 /* CDU CONSTANTS */
 /*****************/
 #define CDU_SEG_TYPE_OFFSET_REG_TYPE_SHIFT              (17)
 #define CDU_SEG_TYPE_OFFSET_REG_OFFSET_MASK             (0x1ffff)
 
+#define CDU_VF_FL_SEG_TYPE_OFFSET_REG_TYPE_SHIFT       (12)
+#define CDU_VF_FL_SEG_TYPE_OFFSET_REG_OFFSET_MASK      (0xfff)
 /*****************/
 /* DQ CONSTANTS  */
 /*****************/
 #define        DQ_XCM_ETH_TX_BD_CONS_CMD       DQ_XCM_AGG_VAL_SEL_WORD3
 #define        DQ_XCM_ETH_TX_BD_PROD_CMD       DQ_XCM_AGG_VAL_SEL_WORD4
 #define        DQ_XCM_ETH_GO_TO_BD_CONS_CMD    DQ_XCM_AGG_VAL_SEL_WORD5
+#define DQ_XCM_ISCSI_SQ_CONS_CMD       DQ_XCM_AGG_VAL_SEL_WORD3
+#define DQ_XCM_ISCSI_SQ_PROD_CMD       DQ_XCM_AGG_VAL_SEL_WORD4
+#define DQ_XCM_ISCSI_MORE_TO_SEND_SEQ_CMD DQ_XCM_AGG_VAL_SEL_REG3
+#define DQ_XCM_ISCSI_EXP_STAT_SN_CMD   DQ_XCM_AGG_VAL_SEL_REG6
+#define DQ_XCM_ROCE_SQ_PROD_CMD        DQ_XCM_AGG_VAL_SEL_WORD4
 
 /* UCM agg val selection (HW) */
 #define        DQ_UCM_AGG_VAL_SEL_WORD0        0
 #define        DQ_XCM_AGG_FLG_SHIFT_CF23       7
 
 /* XCM agg counter flag selection */
-#define DQ_XCM_CORE_DQ_CF_CMD          (1 << DQ_XCM_AGG_FLG_SHIFT_CF18)
-#define DQ_XCM_CORE_TERMINATE_CMD      (1 << DQ_XCM_AGG_FLG_SHIFT_CF19)
-#define DQ_XCM_CORE_SLOW_PATH_CMD      (1 << DQ_XCM_AGG_FLG_SHIFT_CF22)
-#define DQ_XCM_ETH_DQ_CF_CMD           (1 << DQ_XCM_AGG_FLG_SHIFT_CF18)
-#define DQ_XCM_ETH_TERMINATE_CMD       (1 << DQ_XCM_AGG_FLG_SHIFT_CF19)
-#define DQ_XCM_ETH_SLOW_PATH_CMD       (1 << DQ_XCM_AGG_FLG_SHIFT_CF22)
-#define DQ_XCM_ETH_TPH_EN_CMD          (1 << DQ_XCM_AGG_FLG_SHIFT_CF23)
+#define DQ_XCM_CORE_DQ_CF_CMD          BIT(DQ_XCM_AGG_FLG_SHIFT_CF18)
+#define DQ_XCM_CORE_TERMINATE_CMD      BIT(DQ_XCM_AGG_FLG_SHIFT_CF19)
+#define DQ_XCM_CORE_SLOW_PATH_CMD      BIT(DQ_XCM_AGG_FLG_SHIFT_CF22)
+#define DQ_XCM_ETH_DQ_CF_CMD           BIT(DQ_XCM_AGG_FLG_SHIFT_CF18)
+#define DQ_XCM_ETH_TERMINATE_CMD       BIT(DQ_XCM_AGG_FLG_SHIFT_CF19)
+#define DQ_XCM_ETH_SLOW_PATH_CMD       BIT(DQ_XCM_AGG_FLG_SHIFT_CF22)
+#define DQ_XCM_ETH_TPH_EN_CMD          BIT(DQ_XCM_AGG_FLG_SHIFT_CF23)
+#define DQ_XCM_ISCSI_DQ_FLUSH_CMD      BIT(DQ_XCM_AGG_FLG_SHIFT_CF19)
+#define DQ_XCM_ISCSI_SLOW_PATH_CMD     BIT(DQ_XCM_AGG_FLG_SHIFT_CF22)
+#define DQ_XCM_ISCSI_PROC_ONLY_CLEANUP_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF23)
 
 /* UCM agg counter flag selection (HW) */
 #define        DQ_UCM_AGG_FLG_SHIFT_CF0        0
 #define        DQ_UCM_AGG_FLG_SHIFT_RULE1EN    7
 
 /* UCM agg counter flag selection (FW) */
-#define DQ_UCM_ETH_PMD_TX_ARM_CMD      (1 << DQ_UCM_AGG_FLG_SHIFT_CF4)
-#define DQ_UCM_ETH_PMD_RX_ARM_CMD      (1 << DQ_UCM_AGG_FLG_SHIFT_CF5)
-
+#define DQ_UCM_ETH_PMD_TX_ARM_CMD      BIT(DQ_UCM_AGG_FLG_SHIFT_CF4)
+#define DQ_UCM_ETH_PMD_RX_ARM_CMD      BIT(DQ_UCM_AGG_FLG_SHIFT_CF5)
+#define DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD   BIT(DQ_UCM_AGG_FLG_SHIFT_CF4)
+#define DQ_UCM_ROCE_CQ_ARM_CF_CMD      BIT(DQ_UCM_AGG_FLG_SHIFT_CF5)
+
+/* TCM agg counter flag selection (HW) */
+#define DQ_TCM_AGG_FLG_SHIFT_CF0       0
+#define DQ_TCM_AGG_FLG_SHIFT_CF1       1
+#define DQ_TCM_AGG_FLG_SHIFT_CF2       2
+#define DQ_TCM_AGG_FLG_SHIFT_CF3       3
+#define DQ_TCM_AGG_FLG_SHIFT_CF4       4
+#define DQ_TCM_AGG_FLG_SHIFT_CF5       5
+#define DQ_TCM_AGG_FLG_SHIFT_CF6       6
+#define DQ_TCM_AGG_FLG_SHIFT_CF7       7
+/* TCM agg counter flag selection (FW) */
+#define DQ_TCM_ISCSI_FLUSH_Q0_CMD      BIT(DQ_TCM_AGG_FLG_SHIFT_CF1)
+#define DQ_TCM_ISCSI_TIMER_STOP_ALL_CMD        BIT(DQ_TCM_AGG_FLG_SHIFT_CF3)
+
+/* PWM address mapping */
+#define DQ_PWM_OFFSET_DPM_BASE 0x0
+#define DQ_PWM_OFFSET_DPM_END  0x27
+#define DQ_PWM_OFFSET_XCM16_BASE       0x40
+#define DQ_PWM_OFFSET_XCM32_BASE       0x44
+#define DQ_PWM_OFFSET_UCM16_BASE       0x48
+#define DQ_PWM_OFFSET_UCM32_BASE       0x4C
+#define DQ_PWM_OFFSET_UCM16_4  0x50
+#define DQ_PWM_OFFSET_TCM16_BASE       0x58
+#define DQ_PWM_OFFSET_TCM32_BASE       0x5C
+#define DQ_PWM_OFFSET_XCM_FLAGS        0x68
+#define DQ_PWM_OFFSET_UCM_FLAGS        0x69
+#define DQ_PWM_OFFSET_TCM_FLAGS        0x6B
+
+#define DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD         (DQ_PWM_OFFSET_XCM16_BASE + 2)
+#define DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT   (DQ_PWM_OFFSET_UCM32_BASE)
+#define DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_16BIT   (DQ_PWM_OFFSET_UCM16_4)
+#define DQ_PWM_OFFSET_UCM_RDMA_INT_TIMEOUT     (DQ_PWM_OFFSET_UCM16_BASE + 2)
+#define DQ_PWM_OFFSET_UCM_RDMA_ARM_FLAGS       (DQ_PWM_OFFSET_UCM_FLAGS)
+#define DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD         (DQ_PWM_OFFSET_TCM16_BASE + 1)
+#define DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD                (DQ_PWM_OFFSET_TCM16_BASE + 3)
 #define        DQ_REGION_SHIFT (12)
 
 /* DPM */
  */
 #define CM_TX_PQ_BASE  0x200
 
+/* number of global Vport/QCN rate limiters */
+#define MAX_QM_GLOBAL_RLS      256
 /* QM registers data */
 #define QM_LINE_CRD_REG_WIDTH          16
-#define QM_LINE_CRD_REG_SIGN_BIT       (1 << (QM_LINE_CRD_REG_WIDTH - 1))
+#define QM_LINE_CRD_REG_SIGN_BIT       BIT((QM_LINE_CRD_REG_WIDTH - 1))
 #define QM_BYTE_CRD_REG_WIDTH          24
-#define QM_BYTE_CRD_REG_SIGN_BIT       (1 << (QM_BYTE_CRD_REG_WIDTH - 1))
+#define QM_BYTE_CRD_REG_SIGN_BIT       BIT((QM_BYTE_CRD_REG_WIDTH - 1))
 #define QM_WFQ_CRD_REG_WIDTH           32
-#define QM_WFQ_CRD_REG_SIGN_BIT                (1 << (QM_WFQ_CRD_REG_WIDTH - 1))
+#define QM_WFQ_CRD_REG_SIGN_BIT                BIT((QM_WFQ_CRD_REG_WIDTH - 1))
 #define QM_RL_CRD_REG_WIDTH            32
-#define QM_RL_CRD_REG_SIGN_BIT         (1 << (QM_RL_CRD_REG_WIDTH - 1))
+#define QM_RL_CRD_REG_SIGN_BIT         BIT((QM_RL_CRD_REG_WIDTH - 1))
 
 /*****************/
 /* CAU CONSTANTS */
 /* PXP CONSTANTS */
 /*****************/
 
+/* Bars for Blocks */
+#define PXP_BAR_GRC    0
+#define PXP_BAR_TSDM   0
+#define PXP_BAR_USDM   0
+#define PXP_BAR_XSDM   0
+#define PXP_BAR_MSDM   0
+#define PXP_BAR_YSDM   0
+#define PXP_BAR_PSDM   0
+#define PXP_BAR_IGU    0
+#define PXP_BAR_DQ     1
+
 /* PTT and GTT */
 #define PXP_NUM_PF_WINDOWS             12
 #define PXP_PER_PF_ENTRY_SIZE          8
        (PXP_EXTERNAL_BAR_GLOBAL_WINDOW_START + \
         PXP_EXTERNAL_BAR_GLOBAL_WINDOW_LENGTH - 1)
 
+/* PF BAR */
+#define PXP_BAR0_START_GRC     0x0000
+#define PXP_BAR0_GRC_LENGTH    0x1C00000
+#define PXP_BAR0_END_GRC       (PXP_BAR0_START_GRC + \
+                                PXP_BAR0_GRC_LENGTH - 1)
+
+#define PXP_BAR0_START_IGU     0x1C00000
+#define PXP_BAR0_IGU_LENGTH    0x10000
+#define PXP_BAR0_END_IGU       (PXP_BAR0_START_IGU + \
+                                PXP_BAR0_IGU_LENGTH - 1)
+
+#define PXP_BAR0_START_TSDM    0x1C80000
+#define PXP_BAR0_SDM_LENGTH    0x40000
+#define PXP_BAR0_SDM_RESERVED_LENGTH   0x40000
+#define PXP_BAR0_END_TSDM      (PXP_BAR0_START_TSDM + \
+                                PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_START_MSDM    0x1D00000
+#define PXP_BAR0_END_MSDM      (PXP_BAR0_START_MSDM + \
+                                PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_START_USDM    0x1D80000
+#define PXP_BAR0_END_USDM      (PXP_BAR0_START_USDM + \
+                                PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_START_XSDM    0x1E00000
+#define PXP_BAR0_END_XSDM      (PXP_BAR0_START_XSDM + \
+                                PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_START_YSDM    0x1E80000
+#define PXP_BAR0_END_YSDM      (PXP_BAR0_START_YSDM + \
+                                PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_START_PSDM    0x1F00000
+#define PXP_BAR0_END_PSDM      (PXP_BAR0_START_PSDM + \
+                                PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_FIRST_INVALID_ADDRESS (PXP_BAR0_END_PSDM + 1)
+
+/* VF BAR */
+#define PXP_VF_BAR0    0
+
+#define PXP_VF_BAR0_START_GRC  0x3E00
+#define PXP_VF_BAR0_GRC_LENGTH 0x200
+#define PXP_VF_BAR0_END_GRC    (PXP_VF_BAR0_START_GRC + \
+                                PXP_VF_BAR0_GRC_LENGTH - 1)
 
 #define PXP_VF_BAR0_START_IGU                   0
 #define PXP_VF_BAR0_IGU_LENGTH                  0x3000
 #define PXP_NUM_ILT_RECORDS_BB 7600
 #define PXP_NUM_ILT_RECORDS_K2 11000
 #define MAX_NUM_ILT_RECORDS MAX(PXP_NUM_ILT_RECORDS_BB, PXP_NUM_ILT_RECORDS_K2)
+#define PXP_QUEUES_ZONE_MAX_NUM 320
+/*****************/
+/* PRM CONSTANTS */
+/*****************/
+#define PRM_DMA_PAD_BYTES_NUM  2
+/******************/
+/* SDMs CONSTANTS */
+/******************/
+#define SDM_OP_GEN_TRIG_NONE   0
+#define SDM_OP_GEN_TRIG_WAKE_THREAD    1
+#define SDM_OP_GEN_TRIG_AGG_INT        2
+#define SDM_OP_GEN_TRIG_LOADER 4
+#define SDM_OP_GEN_TRIG_INDICATE_ERROR 6
+#define SDM_OP_GEN_TRIG_RELEASE_THREAD 7
 
 #define SDM_COMP_TYPE_NONE              0
 #define SDM_COMP_TYPE_WAKE_THREAD       1
 /* PRS CONSTANTS */
 /*****************/
 
+#define PRS_GFT_CAM_LINES_NO_MATCH     31
+
 /* Async data KCQ CQE */
 struct async_data {
        __le32  cid;
@@ -440,20 +624,6 @@ struct coalescing_timeset {
 #define        COALESCING_TIMESET_VALID_SHIFT          7
 };
 
-struct common_prs_pf_msg_info {
-       __le32 value;
-#define        COMMON_PRS_PF_MSG_INFO_NPAR_DEFAULT_PF_MASK     0x1
-#define        COMMON_PRS_PF_MSG_INFO_NPAR_DEFAULT_PF_SHIFT    0
-#define        COMMON_PRS_PF_MSG_INFO_FW_DEBUG_1_MASK          0x1
-#define        COMMON_PRS_PF_MSG_INFO_FW_DEBUG_1_SHIFT         1
-#define        COMMON_PRS_PF_MSG_INFO_FW_DEBUG_2_MASK          0x1
-#define        COMMON_PRS_PF_MSG_INFO_FW_DEBUG_2_SHIFT         2
-#define        COMMON_PRS_PF_MSG_INFO_FW_DEBUG_3_MASK          0x1
-#define        COMMON_PRS_PF_MSG_INFO_FW_DEBUG_3_SHIFT         3
-#define        COMMON_PRS_PF_MSG_INFO_RESERVED_MASK            0xFFFFFFF
-#define        COMMON_PRS_PF_MSG_INFO_RESERVED_SHIFT           4
-};
-
 struct common_queue_zone {
        __le16 ring_drv_data_consumer;
        __le16 reserved;
@@ -473,6 +643,19 @@ struct vf_pf_channel_eqe_data {
        struct regpair msg_addr;
 };
 
+struct iscsi_eqe_data {
+       __le32 cid;
+       __le16 conn_id;
+       u8 error_code;
+       u8 error_pdu_opcode_reserved;
+#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_MASK           0x3F
+#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_SHIFT          0
+#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_VALID_MASK     0x1
+#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_VALID_SHIFT     6
+#define ISCSI_EQE_DATA_RESERVED0_MASK                  0x1
+#define ISCSI_EQE_DATA_RESERVED0_SHIFT                 7
+};
+
 struct malicious_vf_eqe_data {
        u8 vf_id;
        u8 err_id;
@@ -488,6 +671,7 @@ struct initial_cleanup_eqe_data {
 union event_ring_data {
        u8 bytes[8];
        struct vf_pf_channel_eqe_data vf_pf_channel;
+       struct iscsi_eqe_data iscsi_info;
        struct malicious_vf_eqe_data malicious_vf;
        struct initial_cleanup_eqe_data vf_init_cleanup;
 };
@@ -616,6 +800,52 @@ enum db_dest {
        MAX_DB_DEST
 };
 
+/* Enum of doorbell DPM types */
+enum db_dpm_type {
+       DPM_LEGACY,
+       DPM_ROCE,
+       DPM_L2_INLINE,
+       DPM_L2_BD,
+       MAX_DB_DPM_TYPE
+};
+
+/* Structure for doorbell data, in L2 DPM mode, for 1st db in a DPM burst */
+struct db_l2_dpm_data {
+       __le16 icid;
+       __le16 bd_prod;
+       __le32 params;
+#define DB_L2_DPM_DATA_SIZE_MASK       0x3F
+#define DB_L2_DPM_DATA_SIZE_SHIFT      0
+#define DB_L2_DPM_DATA_DPM_TYPE_MASK   0x3
+#define DB_L2_DPM_DATA_DPM_TYPE_SHIFT  6
+#define DB_L2_DPM_DATA_NUM_BDS_MASK    0xFF
+#define DB_L2_DPM_DATA_NUM_BDS_SHIFT   8
+#define DB_L2_DPM_DATA_PKT_SIZE_MASK   0x7FF
+#define DB_L2_DPM_DATA_PKT_SIZE_SHIFT  16
+#define DB_L2_DPM_DATA_RESERVED0_MASK  0x1
+#define DB_L2_DPM_DATA_RESERVED0_SHIFT 27
+#define DB_L2_DPM_DATA_SGE_NUM_MASK    0x7
+#define DB_L2_DPM_DATA_SGE_NUM_SHIFT   28
+#define DB_L2_DPM_DATA_RESERVED1_MASK  0x1
+#define DB_L2_DPM_DATA_RESERVED1_SHIFT 31
+};
+
+/* Structure for SGE in a DPM doorbell of type DPM_L2_BD */
+struct db_l2_dpm_sge {
+       struct regpair addr;
+       __le16 nbytes;
+       __le16 bitfields;
+#define DB_L2_DPM_SGE_TPH_ST_INDEX_MASK        0x1FF
+#define DB_L2_DPM_SGE_TPH_ST_INDEX_SHIFT 0
+#define DB_L2_DPM_SGE_RESERVED0_MASK   0x3
+#define DB_L2_DPM_SGE_RESERVED0_SHIFT  9
+#define DB_L2_DPM_SGE_ST_VALID_MASK    0x1
+#define DB_L2_DPM_SGE_ST_VALID_SHIFT   11
+#define DB_L2_DPM_SGE_RESERVED1_MASK   0xF
+#define DB_L2_DPM_SGE_RESERVED1_SHIFT  12
+       __le32 reserved2;
+};
+
 /* Structure for doorbell address, in legacy mode */
 struct db_legacy_addr {
        __le32 addr;
@@ -627,6 +857,49 @@ struct db_legacy_addr {
 #define DB_LEGACY_ADDR_ICID_SHIFT      5
 };
 
+/* Structure for doorbell address, in PWM mode */
+struct db_pwm_addr {
+       __le32 addr;
+#define DB_PWM_ADDR_RESERVED0_MASK     0x7
+#define DB_PWM_ADDR_RESERVED0_SHIFT 0
+#define DB_PWM_ADDR_OFFSET_MASK        0x7F
+#define DB_PWM_ADDR_OFFSET_SHIFT       3
+#define DB_PWM_ADDR_WID_MASK   0x3
+#define DB_PWM_ADDR_WID_SHIFT  10
+#define DB_PWM_ADDR_DPI_MASK   0xFFFF
+#define DB_PWM_ADDR_DPI_SHIFT  12
+#define DB_PWM_ADDR_RESERVED1_MASK     0xF
+#define DB_PWM_ADDR_RESERVED1_SHIFT 28
+};
+
+/* Parameters to RoCE firmware, passed in EDPM doorbell */
+struct db_roce_dpm_params {
+       __le32 params;
+#define DB_ROCE_DPM_PARAMS_SIZE_MASK           0x3F
+#define DB_ROCE_DPM_PARAMS_SIZE_SHIFT          0
+#define DB_ROCE_DPM_PARAMS_DPM_TYPE_MASK       0x3
+#define DB_ROCE_DPM_PARAMS_DPM_TYPE_SHIFT      6
+#define DB_ROCE_DPM_PARAMS_OPCODE_MASK         0xFF
+#define DB_ROCE_DPM_PARAMS_OPCODE_SHIFT                8
+#define DB_ROCE_DPM_PARAMS_WQE_SIZE_MASK       0x7FF
+#define DB_ROCE_DPM_PARAMS_WQE_SIZE_SHIFT      16
+#define DB_ROCE_DPM_PARAMS_RESERVED0_MASK      0x1
+#define DB_ROCE_DPM_PARAMS_RESERVED0_SHIFT     27
+#define DB_ROCE_DPM_PARAMS_COMPLETION_FLG_MASK 0x1
+#define DB_ROCE_DPM_PARAMS_COMPLETION_FLG_SHIFT 28
+#define DB_ROCE_DPM_PARAMS_S_FLG_MASK          0x1
+#define DB_ROCE_DPM_PARAMS_S_FLG_SHIFT         29
+#define DB_ROCE_DPM_PARAMS_RESERVED1_MASK      0x3
+#define DB_ROCE_DPM_PARAMS_RESERVED1_SHIFT     30
+};
+
+/* Structure for doorbell data, in ROCE DPM mode, for 1st db in a DPM burst */
+struct db_roce_dpm_data {
+       __le16 icid;
+       __le16 prod_val;
+       struct db_roce_dpm_params params;
+};
+
 /* Igu interrupt command */
 enum igu_int_cmd {
        IGU_INT_ENABLE  = 0,
@@ -764,6 +1037,19 @@ struct pxp_ptt_entry {
        struct pxp_pretend_cmd  pretend;
 };
 
+/* VF Zone A Permission Register. */
+struct pxp_vf_zone_a_permission {
+       __le32 control;
+#define PXP_VF_ZONE_A_PERMISSION_VFID_MASK     0xFF
+#define PXP_VF_ZONE_A_PERMISSION_VFID_SHIFT    0
+#define PXP_VF_ZONE_A_PERMISSION_VALID_MASK    0x1
+#define PXP_VF_ZONE_A_PERMISSION_VALID_SHIFT   8
+#define PXP_VF_ZONE_A_PERMISSION_RESERVED0_MASK        0x7F
+#define PXP_VF_ZONE_A_PERMISSION_RESERVED0_SHIFT 9
+#define PXP_VF_ZONE_A_PERMISSION_RESERVED1_MASK        0xFFFF
+#define PXP_VF_ZONE_A_PERMISSION_RESERVED1_SHIFT 16
+};
+
 /* RSS hash type */
 struct rdif_task_context {
        __le32 initial_ref_tag;
@@ -831,6 +1117,7 @@ struct rdif_task_context {
        __le32 reserved2;
 };
 
+/* RSS hash type */
 enum rss_hash_type {
        RSS_HASH_TYPE_DEFAULT   = 0,
        RSS_HASH_TYPE_IPV4      = 1,
@@ -942,7 +1229,7 @@ struct tdif_task_context {
 };
 
 struct timers_context {
-       __le32 logical_client0;
+       __le32 logical_client_0;
 #define TIMERS_CONTEXT_EXPIRATIONTIMELC0_MASK     0xFFFFFFF
 #define TIMERS_CONTEXT_EXPIRATIONTIMELC0_SHIFT    0
 #define TIMERS_CONTEXT_VALIDLC0_MASK              0x1
@@ -951,7 +1238,7 @@ struct timers_context {
 #define TIMERS_CONTEXT_ACTIVELC0_SHIFT            29
 #define TIMERS_CONTEXT_RESERVED0_MASK             0x3
 #define TIMERS_CONTEXT_RESERVED0_SHIFT            30
-       __le32 logical_client1;
+       __le32 logical_client_1;
 #define TIMERS_CONTEXT_EXPIRATIONTIMELC1_MASK     0xFFFFFFF
 #define TIMERS_CONTEXT_EXPIRATIONTIMELC1_SHIFT    0
 #define TIMERS_CONTEXT_VALIDLC1_MASK              0x1
@@ -960,7 +1247,7 @@ struct timers_context {
 #define TIMERS_CONTEXT_ACTIVELC1_SHIFT            29
 #define TIMERS_CONTEXT_RESERVED1_MASK             0x3
 #define TIMERS_CONTEXT_RESERVED1_SHIFT            30
-       __le32 logical_client2;
+       __le32 logical_client_2;
 #define TIMERS_CONTEXT_EXPIRATIONTIMELC2_MASK     0xFFFFFFF
 #define TIMERS_CONTEXT_EXPIRATIONTIMELC2_SHIFT    0
 #define TIMERS_CONTEXT_VALIDLC2_MASK              0x1
@@ -978,3 +1265,4 @@ struct timers_context {
 #define TIMERS_CONTEXT_RESERVED3_SHIFT            29
 };
 #endif /* __COMMON_HSI__ */
+#endif
index b5ebc69..1aa0727 100644 (file)
 /* ETH FW CONSTANTS */
 /********************/
 #define ETH_HSI_VER_MAJOR                   3
-#define ETH_HSI_VER_MINOR                   0
-#define ETH_CACHE_LINE_SIZE                 64
+#define ETH_HSI_VER_MINOR      10
+
+#define ETH_HSI_VER_NO_PKT_LEN_TUNN    5
 
+#define ETH_CACHE_LINE_SIZE                 64
+#define ETH_RX_CQE_GAP 32
 #define ETH_MAX_RAMROD_PER_CON                          8
 #define ETH_TX_BD_PAGE_SIZE_BYTES                       4096
 #define ETH_RX_BD_PAGE_SIZE_BYTES                       4096
 
 #define ETH_TX_MIN_BDS_PER_NON_LSO_PKT                          1
 #define ETH_TX_MAX_BDS_PER_NON_LSO_PACKET                       18
+#define ETH_TX_MAX_BDS_PER_LSO_PACKET  255
 #define ETH_TX_MAX_LSO_HDR_NBD                                          4
 #define ETH_TX_MIN_BDS_PER_LSO_PKT                                      3
 #define ETH_TX_MIN_BDS_PER_TUNN_IPV6_WITH_EXT_PKT       3
 #define ETH_TX_MIN_BDS_PER_IPV6_WITH_EXT_PKT            2
 #define ETH_TX_MIN_BDS_PER_PKT_W_LOOPBACK_MODE          2
-#define ETH_TX_MAX_NON_LSO_PKT_LEN                  (9700 - (4 + 12 + 8))
+#define ETH_TX_MAX_NON_LSO_PKT_LEN     (9700 - (4 + 4 + 12 + 8))
 #define ETH_TX_MAX_LSO_HDR_BYTES                    510
+#define ETH_TX_LSO_WINDOW_BDS_NUM      (18 - 1)
+#define ETH_TX_LSO_WINDOW_MIN_LEN      9700
+#define ETH_TX_MAX_LSO_PAYLOAD_LEN     0xFE000
+#define ETH_TX_NUM_SAME_AS_LAST_ENTRIES        320
+#define ETH_TX_INACTIVE_SAME_AS_LAST   0xFFFF
 
 #define ETH_NUM_STATISTIC_COUNTERS                      MAX_NUM_VPORTS
+#define ETH_NUM_STATISTIC_COUNTERS_DOUBLE_VF_ZONE \
+       (ETH_NUM_STATISTIC_COUNTERS - MAX_NUM_VFS / 2)
+#define ETH_NUM_STATISTIC_COUNTERS_QUAD_VF_ZONE \
+       (ETH_NUM_STATISTIC_COUNTERS - 3 * MAX_NUM_VFS / 4)
 
 /* Maximum number of buffers, used for RX packet placement */
 #define ETH_RX_MAX_BUFF_PER_PKT             5
@@ -59,6 +72,8 @@
 #define ETH_TPA_CQE_CONT_LEN_LIST_SIZE    6
 #define ETH_TPA_CQE_END_LEN_LIST_SIZE     4
 
+/* Control frame check constants */
+#define ETH_CTL_FRAME_ETH_TYPE_NUM     4
 
 struct eth_tx_1st_bd_flags {
        u8 bitfields;
@@ -82,10 +97,10 @@ struct eth_tx_1st_bd_flags {
 
 /* The parsing information data fo rthe first tx bd of a given packet. */
 struct eth_tx_data_1st_bd {
-       __le16                          vlan;
-       u8                              nbds;
-       struct eth_tx_1st_bd_flags      bd_flags;
-       __le16                          bitfields;
+       __le16 vlan;
+       u8 nbds;
+       struct eth_tx_1st_bd_flags bd_flags;
+       __le16 bitfields;
 #define ETH_TX_DATA_1ST_BD_TUNN_FLAG_MASK  0x1
 #define ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT 0
 #define ETH_TX_DATA_1ST_BD_RESERVED0_MASK          0x1
@@ -96,7 +111,7 @@ struct eth_tx_data_1st_bd {
 
 /* The parsing information data for the second tx bd of a given packet. */
 struct eth_tx_data_2nd_bd {
-       __le16  tunn_ip_size;
+       __le16 tunn_ip_size;
        __le16  bitfields1;
 #define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_MASK  0xF
 #define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_SHIFT 0
@@ -125,9 +140,14 @@ struct eth_tx_data_2nd_bd {
 #define ETH_TX_DATA_2ND_BD_RESERVED0_SHIFT                13
 };
 
+/* Firmware data for L2-EDPM packet. */
+struct eth_edpm_fw_data {
+       struct eth_tx_data_1st_bd data_1st_bd;
+       struct eth_tx_data_2nd_bd data_2nd_bd;
+       __le32 reserved;
+};
+
 struct eth_fast_path_cqe_fw_debug {
-       u8 reserved0;
-       u8 reserved1;
        __le16 reserved2;
 };
 
@@ -148,6 +168,17 @@ struct eth_tunnel_parsing_flags {
 #define        ETH_TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_SHIFT     7
 };
 
+/* PMD flow control bits */
+struct eth_pmd_flow_flags {
+       u8 flags;
+#define ETH_PMD_FLOW_FLAGS_VALID_MASK  0x1
+#define ETH_PMD_FLOW_FLAGS_VALID_SHIFT 0
+#define ETH_PMD_FLOW_FLAGS_TOGGLE_MASK 0x1
+#define ETH_PMD_FLOW_FLAGS_TOGGLE_SHIFT        1
+#define ETH_PMD_FLOW_FLAGS_RESERVED_MASK 0x3F
+#define ETH_PMD_FLOW_FLAGS_RESERVED_SHIFT 2
+};
+
 /* Regular ETH Rx FP CQE. */
 struct eth_fast_path_rx_reg_cqe {
        u8 type;
@@ -166,64 +197,63 @@ struct eth_fast_path_rx_reg_cqe {
        u8 placement_offset;
        struct eth_tunnel_parsing_flags tunnel_pars_flags;
        u8 bd_num;
-       u8 reserved[7];
+       u8 reserved[9];
        struct eth_fast_path_cqe_fw_debug fw_debug;
        u8 reserved1[3];
-       u8 flags;
-#define ETH_FAST_PATH_RX_REG_CQE_VALID_MASK          0x1
-#define ETH_FAST_PATH_RX_REG_CQE_VALID_SHIFT         0
-#define ETH_FAST_PATH_RX_REG_CQE_VALID_TOGGLE_MASK   0x1
-#define ETH_FAST_PATH_RX_REG_CQE_VALID_TOGGLE_SHIFT  1
-#define ETH_FAST_PATH_RX_REG_CQE_RESERVED2_MASK      0x3F
-#define ETH_FAST_PATH_RX_REG_CQE_RESERVED2_SHIFT     2
+       struct eth_pmd_flow_flags pmd_flags;
 };
 
 /* TPA-continue ETH Rx FP CQE. */
 struct eth_fast_path_rx_tpa_cont_cqe {
-       u8      type;
-       u8      tpa_agg_index;
-       __le16  len_list[ETH_TPA_CQE_CONT_LEN_LIST_SIZE];
-       u8      reserved[5];
-       u8      reserved1;
-       __le16  reserved2[ETH_TPA_CQE_CONT_LEN_LIST_SIZE];
+       u8 type;
+       u8 tpa_agg_index;
+       __le16 len_list[ETH_TPA_CQE_CONT_LEN_LIST_SIZE];
+       u8 reserved;
+       u8 reserved1;
+       __le16 reserved2[ETH_TPA_CQE_CONT_LEN_LIST_SIZE];
+       u8 reserved3[3];
+       struct eth_pmd_flow_flags pmd_flags;
 };
 
 /* TPA-end ETH Rx FP CQE. */
 struct eth_fast_path_rx_tpa_end_cqe {
-       u8      type;
-       u8      tpa_agg_index;
-       __le16  total_packet_len;
-       u8      num_of_bds;
-       u8      end_reason;
-       __le16  num_of_coalesced_segs;
-       __le32  ts_delta;
-       __le16  len_list[ETH_TPA_CQE_END_LEN_LIST_SIZE];
-       u8      reserved1[3];
-       u8      reserved2;
-       __le16  reserved3[ETH_TPA_CQE_END_LEN_LIST_SIZE];
+       u8 type;
+       u8 tpa_agg_index;
+       __le16 total_packet_len;
+       u8 num_of_bds;
+       u8 end_reason;
+       __le16 num_of_coalesced_segs;
+       __le32 ts_delta;
+       __le16 len_list[ETH_TPA_CQE_END_LEN_LIST_SIZE];
+       __le16 reserved3[ETH_TPA_CQE_END_LEN_LIST_SIZE];
+       __le16 reserved1;
+       u8 reserved2;
+       struct eth_pmd_flow_flags pmd_flags;
 };
 
 /* TPA-start ETH Rx FP CQE. */
 struct eth_fast_path_rx_tpa_start_cqe {
-       u8      type;
-       u8      bitfields;
+       u8 type;
+       u8 bitfields;
 #define ETH_FAST_PATH_RX_TPA_START_CQE_RSS_HASH_TYPE_MASK  0x7
 #define ETH_FAST_PATH_RX_TPA_START_CQE_RSS_HASH_TYPE_SHIFT 0
 #define ETH_FAST_PATH_RX_TPA_START_CQE_TC_MASK             0xF
 #define ETH_FAST_PATH_RX_TPA_START_CQE_TC_SHIFT            3
 #define ETH_FAST_PATH_RX_TPA_START_CQE_RESERVED0_MASK      0x1
 #define ETH_FAST_PATH_RX_TPA_START_CQE_RESERVED0_SHIFT     7
-       __le16  seg_len;
+       __le16 seg_len;
        struct parsing_and_err_flags pars_flags;
-       __le16  vlan_tag;
-       __le32  rss_hash;
-       __le16  len_on_first_bd;
-       u8      placement_offset;
+       __le16 vlan_tag;
+       __le32 rss_hash;
+       __le16 len_on_first_bd;
+       u8 placement_offset;
        struct eth_tunnel_parsing_flags tunnel_pars_flags;
-       u8      tpa_agg_index;
-       u8      header_len;
-       __le16  ext_bd_len_list[ETH_TPA_CQE_START_LEN_LIST_SIZE];
+       u8 tpa_agg_index;
+       u8 header_len;
+       __le16 ext_bd_len_list[ETH_TPA_CQE_START_LEN_LIST_SIZE];
        struct eth_fast_path_cqe_fw_debug fw_debug;
+       u8 reserved;
+       struct eth_pmd_flow_flags pmd_flags;
 };
 
 /* The L4 pseudo checksum mode for Ethernet */
@@ -245,15 +275,7 @@ struct eth_slow_path_rx_cqe {
        u8      reserved[25];
        __le16  echo;
        u8      reserved1;
-       u8      flags;
-/* for PMD mode - valid indication */
-#define ETH_SLOW_PATH_RX_CQE_VALID_MASK         0x1
-#define ETH_SLOW_PATH_RX_CQE_VALID_SHIFT        0
-/* for PMD mode - valid toggle indication */
-#define ETH_SLOW_PATH_RX_CQE_VALID_TOGGLE_MASK  0x1
-#define ETH_SLOW_PATH_RX_CQE_VALID_TOGGLE_SHIFT 1
-#define ETH_SLOW_PATH_RX_CQE_RESERVED2_MASK     0x3F
-#define ETH_SLOW_PATH_RX_CQE_RESERVED2_SHIFT    2
+       struct eth_pmd_flow_flags pmd_flags;
 };
 
 /* union for all ETH Rx CQE types */
@@ -276,6 +298,11 @@ enum eth_rx_cqe_type {
        MAX_ETH_RX_CQE_TYPE
 };
 
+struct eth_rx_pmd_cqe {
+       union eth_rx_cqe cqe;
+       u8 reserved[ETH_RX_CQE_GAP];
+};
+
 enum eth_rx_tunn_type {
        ETH_RX_NO_TUNN,
        ETH_RX_TUNN_GENEVE,
@@ -313,8 +340,8 @@ struct eth_tx_2nd_bd {
 
 /* The parsing information data for the third tx bd of a given packet. */
 struct eth_tx_data_3rd_bd {
-       __le16  lso_mss;
-       __le16  bitfields;
+       __le16 lso_mss;
+       __le16 bitfields;
 #define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_MASK  0xF
 #define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_SHIFT 0
 #define ETH_TX_DATA_3RD_BD_HDR_NBD_MASK         0xF
@@ -323,8 +350,8 @@ struct eth_tx_data_3rd_bd {
 #define ETH_TX_DATA_3RD_BD_START_BD_SHIFT       8
 #define ETH_TX_DATA_3RD_BD_RESERVED0_MASK       0x7F
 #define ETH_TX_DATA_3RD_BD_RESERVED0_SHIFT      9
-       u8      tunn_l4_hdr_start_offset_w;
-       u8      tunn_hdr_size_w;
+       u8 tunn_l4_hdr_start_offset_w;
+       u8 tunn_hdr_size_w;
 };
 
 /* The third tx bd of a given packet */
@@ -355,10 +382,10 @@ struct eth_tx_bd {
 };
 
 union eth_tx_bd_types {
-       struct eth_tx_1st_bd    first_bd;
-       struct eth_tx_2nd_bd    second_bd;
-       struct eth_tx_3rd_bd    third_bd;
-       struct eth_tx_bd        reg_bd;
+       struct eth_tx_1st_bd first_bd;
+       struct eth_tx_2nd_bd second_bd;
+       struct eth_tx_3rd_bd third_bd;
+       struct eth_tx_bd reg_bd;
 };
 
 /* Mstorm Queue Zone */
@@ -389,8 +416,8 @@ struct eth_db_data {
 #define ETH_DB_DATA_RESERVED_SHIFT    5
 #define ETH_DB_DATA_AGG_VAL_SEL_MASK  0x3
 #define ETH_DB_DATA_AGG_VAL_SEL_SHIFT 6
-       u8      agg_flags;
-       __le16  bd_prod;
+       u8 agg_flags;
+       __le16 bd_prod;
 };
 
 #endif /* __ETH_COMMON__ */
index b3c0feb..8f64b12 100644 (file)
@@ -311,7 +311,7 @@ struct iscsi_login_req_hdr {
 #define ISCSI_LOGIN_REQ_HDR_DATA_SEG_LEN_SHIFT  0
 #define ISCSI_LOGIN_REQ_HDR_TOTAL_AHS_LEN_MASK  0xFF
 #define ISCSI_LOGIN_REQ_HDR_TOTAL_AHS_LEN_SHIFT 24
-       __le32 isid_TABC;
+       __le32 isid_tabc;
        __le16 tsih;
        __le16 isid_d;
        __le32 itt;
@@ -464,7 +464,7 @@ struct iscsi_login_response_hdr {
 #define ISCSI_LOGIN_RESPONSE_HDR_DATA_SEG_LEN_SHIFT  0
 #define ISCSI_LOGIN_RESPONSE_HDR_TOTAL_AHS_LEN_MASK  0xFF
 #define ISCSI_LOGIN_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24
-       __le32 isid_TABC;
+       __le32 isid_tabc;
        __le16 tsih;
        __le16 isid_d;
        __le32 itt;
@@ -688,8 +688,7 @@ union iscsi_cqe {
 enum iscsi_cqes_type {
        ISCSI_CQE_TYPE_SOLICITED = 1,
        ISCSI_CQE_TYPE_UNSOLICITED,
-       ISCSI_CQE_TYPE_SOLICITED_WITH_SENSE
-          ,
+       ISCSI_CQE_TYPE_SOLICITED_WITH_SENSE,
        ISCSI_CQE_TYPE_TASK_CLEANUP,
        ISCSI_CQE_TYPE_DUMMY,
        MAX_ISCSI_CQES_TYPE
@@ -769,9 +768,9 @@ enum iscsi_eqe_opcode {
        ISCSI_EVENT_TYPE_UPDATE_CONN,
        ISCSI_EVENT_TYPE_CLEAR_SQ,
        ISCSI_EVENT_TYPE_TERMINATE_CONN,
+       ISCSI_EVENT_TYPE_MAC_UPDATE_CONN,
        ISCSI_EVENT_TYPE_ASYN_CONNECT_COMPLETE,
        ISCSI_EVENT_TYPE_ASYN_TERMINATE_DONE,
-       RESERVED8,
        RESERVED9,
        ISCSI_EVENT_TYPE_START_OF_ERROR_TYPES = 10,
        ISCSI_EVENT_TYPE_ASYN_ABORT_RCVD,
@@ -867,6 +866,7 @@ enum iscsi_ramrod_cmd_id {
        ISCSI_RAMROD_CMD_ID_UPDATE_CONN = 4,
        ISCSI_RAMROD_CMD_ID_TERMINATION_CONN = 5,
        ISCSI_RAMROD_CMD_ID_CLEAR_SQ = 6,
+       ISCSI_RAMROD_CMD_ID_MAC_UPDATE = 7,
        MAX_ISCSI_RAMROD_CMD_ID
 };
 
@@ -883,6 +883,16 @@ union iscsi_seq_num {
        __le16 r2t_sn;
 };
 
+struct iscsi_spe_conn_mac_update {
+       struct iscsi_slow_path_hdr hdr;
+       __le16 conn_id;
+       __le32 fw_cid;
+       __le16 remote_mac_addr_lo;
+       __le16 remote_mac_addr_mid;
+       __le16 remote_mac_addr_hi;
+       u8 reserved0[2];
+};
+
 struct iscsi_spe_conn_offload {
        struct iscsi_slow_path_hdr hdr;
        __le16 conn_id;
@@ -1302,14 +1312,6 @@ struct mstorm_iscsi_stats_drv {
        struct regpair iscsi_rx_dropped_pdus_task_not_valid;
 };
 
-struct ooo_opaque {
-       __le32 cid;
-       u8 drop_isle;
-       u8 drop_size;
-       u8 ooo_opcode;
-       u8 ooo_isle;
-};
-
 struct pstorm_iscsi_stats_drv {
        struct regpair iscsi_tx_bytes_cnt;
        struct regpair iscsi_tx_packet_cnt;
index 7e441bd..72d88cf 100644 (file)
 #include <linux/slab.h>
 #include <linux/qed/common_hsi.h>
 
-/* dma_addr_t manip */
-#define DMA_LO_LE(x)            cpu_to_le32(lower_32_bits(x))
-#define DMA_HI_LE(x)            cpu_to_le32(upper_32_bits(x))
-#define DMA_REGPAIR_LE(x, val)  do { \
-                                       (x).hi = DMA_HI_LE((val)); \
-                                       (x).lo = DMA_LO_LE((val)); \
-                               } while (0)
-
-#define HILO_GEN(hi, lo, type)  ((((type)(hi)) << 32) + (lo))
-#define HILO_64(hi, lo) HILO_GEN((le32_to_cpu(hi)), (le32_to_cpu(lo)), u64)
-#define HILO_64_REGPAIR(regpair)        (HILO_64(regpair.hi, regpair.lo))
-#define HILO_DMA_REGPAIR(regpair)      ((dma_addr_t)HILO_64_REGPAIR(regpair))
-
 enum qed_chain_mode {
        /* Each Page contains a next pointer at its end */
        QED_CHAIN_MODE_NEXT_PTR,
index 4475a9d..33c24eb 100644 (file)
@@ -23,6 +23,9 @@ struct qed_dev_eth_info {
 
        u8      port_mac[ETH_ALEN];
        u8      num_vlan_filters;
+
+       /* Legacy VF - this affects the datapath, so qede has to know */
+       bool is_legacy;
 };
 
 struct qed_update_vport_rss_params {
index 3ed7d20..e4546ab 100644 (file)
@@ -318,9 +318,11 @@ struct qed_link_params {
 struct qed_link_output {
        bool    link_up;
 
-       u32     supported_caps;         /* In SUPPORTED defs */
-       u32     advertised_caps;        /* In ADVERTISED defs */
-       u32     lp_caps;                /* In ADVERTISED defs */
+       /* In QED_LM_* defs */
+       u32     supported_caps;
+       u32     advertised_caps;
+       u32     lp_caps;
+
        u32     speed;                  /* In Mb/s */
        u8      duplex;                 /* In DUPLEX defs */
        u8      port;                   /* In PORT defs */
@@ -453,6 +455,10 @@ struct qed_common_ops {
        void            (*simd_handler_clean)(struct qed_dev *cdev,
                                              int index);
 
+       int (*dbg_all_data) (struct qed_dev *cdev, void *buffer);
+
+       int (*dbg_all_data_size) (struct qed_dev *cdev);
+
 /**
  * @brief can_link_change - can the instance change the link or not
  *
index accba0e..dc3889d 100644 (file)
 
 #define TCP_INVALID_TIMEOUT_VAL -1
 
+struct ooo_opaque {
+       __le32 cid;
+       u8 drop_isle;
+       u8 drop_size;
+       u8 ooo_opcode;
+       u8 ooo_isle;
+};
+
 enum tcp_connect_mode {
        TCP_CONNECT_ACTIVE,
        TCP_CONNECT_PASSIVE,
@@ -18,14 +26,10 @@ enum tcp_connect_mode {
 };
 
 struct tcp_init_params {
-       __le32 max_cwnd;
-       __le16 dup_ack_threshold;
+       __le32 two_msl_timer;
        __le16 tx_sws_timer;
-       __le16 min_rto;
-       __le16 min_rto_rt;
-       __le16 max_rto;
        u8 maxfinrt;
-       u8 reserved[1];
+       u8 reserved[9];
 };
 
 enum tcp_ip_version {
index 3eef080..fd82584 100644 (file)
@@ -173,7 +173,7 @@ struct rhashtable_walker {
 struct rhashtable_iter {
        struct rhashtable *ht;
        struct rhash_head *p;
-       struct rhashtable_walker *walker;
+       struct rhashtable_walker walker;
        unsigned int slot;
        unsigned int skip;
 };
@@ -343,11 +343,12 @@ int rhashtable_init(struct rhashtable *ht,
 struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
                                            const void *key,
                                            struct rhash_head *obj,
-                                           struct bucket_table *old_tbl);
+                                           struct bucket_table *old_tbl,
+                                           void **data);
 int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl);
 
-int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter,
-                        gfp_t gfp);
+void rhashtable_walk_enter(struct rhashtable *ht,
+                          struct rhashtable_iter *iter);
 void rhashtable_walk_exit(struct rhashtable_iter *iter);
 int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU);
 void *rhashtable_walk_next(struct rhashtable_iter *iter);
@@ -563,8 +564,11 @@ restart:
        return NULL;
 }
 
-/* Internal function, please use rhashtable_insert_fast() instead */
-static inline int __rhashtable_insert_fast(
+/* Internal function, please use rhashtable_insert_fast() instead. This
+ * function returns the existing element already in hashes in there is a clash,
+ * otherwise it returns an error via ERR_PTR().
+ */
+static inline void *__rhashtable_insert_fast(
        struct rhashtable *ht, const void *key, struct rhash_head *obj,
        const struct rhashtable_params params)
 {
@@ -577,6 +581,7 @@ static inline int __rhashtable_insert_fast(
        spinlock_t *lock;
        unsigned int elasticity;
        unsigned int hash;
+       void *data = NULL;
        int err;
 
 restart:
@@ -601,11 +606,14 @@ restart:
 
        new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
        if (unlikely(new_tbl)) {
-               tbl = rhashtable_insert_slow(ht, key, obj, new_tbl);
+               tbl = rhashtable_insert_slow(ht, key, obj, new_tbl, &data);
                if (!IS_ERR_OR_NULL(tbl))
                        goto slow_path;
 
                err = PTR_ERR(tbl);
+               if (err == -EEXIST)
+                       err = 0;
+
                goto out;
        }
 
@@ -619,25 +627,25 @@ slow_path:
                err = rhashtable_insert_rehash(ht, tbl);
                rcu_read_unlock();
                if (err)
-                       return err;
+                       return ERR_PTR(err);
 
                goto restart;
        }
 
-       err = -EEXIST;
+       err = 0;
        elasticity = ht->elasticity;
        rht_for_each(head, tbl, hash) {
                if (key &&
                    unlikely(!(params.obj_cmpfn ?
                               params.obj_cmpfn(&arg, rht_obj(ht, head)) :
-                              rhashtable_compare(&arg, rht_obj(ht, head)))))
+                              rhashtable_compare(&arg, rht_obj(ht, head))))) {
+                       data = rht_obj(ht, head);
                        goto out;
+               }
                if (!--elasticity)
                        goto slow_path;
        }
 
-       err = 0;
-
        head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
 
        RCU_INIT_POINTER(obj->next, head);
@@ -652,7 +660,7 @@ out:
        spin_unlock_bh(lock);
        rcu_read_unlock();
 
-       return err;
+       return err ? ERR_PTR(err) : data;
 }
 
 /**
@@ -675,7 +683,13 @@ static inline int rhashtable_insert_fast(
        struct rhashtable *ht, struct rhash_head *obj,
        const struct rhashtable_params params)
 {
-       return __rhashtable_insert_fast(ht, NULL, obj, params);
+       void *ret;
+
+       ret = __rhashtable_insert_fast(ht, NULL, obj, params);
+       if (IS_ERR(ret))
+               return PTR_ERR(ret);
+
+       return ret == NULL ? 0 : -EEXIST;
 }
 
 /**
@@ -704,11 +718,15 @@ static inline int rhashtable_lookup_insert_fast(
        const struct rhashtable_params params)
 {
        const char *key = rht_obj(ht, obj);
+       void *ret;
 
        BUG_ON(ht->p.obj_hashfn);
 
-       return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj,
-                                       params);
+       ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params);
+       if (IS_ERR(ret))
+               return PTR_ERR(ret);
+
+       return ret == NULL ? 0 : -EEXIST;
 }
 
 /**
@@ -736,6 +754,32 @@ static inline int rhashtable_lookup_insert_fast(
 static inline int rhashtable_lookup_insert_key(
        struct rhashtable *ht, const void *key, struct rhash_head *obj,
        const struct rhashtable_params params)
+{
+       void *ret;
+
+       BUG_ON(!ht->p.obj_hashfn || !key);
+
+       ret = __rhashtable_insert_fast(ht, key, obj, params);
+       if (IS_ERR(ret))
+               return PTR_ERR(ret);
+
+       return ret == NULL ? 0 : -EEXIST;
+}
+
+/**
+ * rhashtable_lookup_get_insert_key - lookup and insert object into hash table
+ * @ht:                hash table
+ * @obj:       pointer to hash head inside object
+ * @params:    hash table parameters
+ * @data:      pointer to element data already in hashes
+ *
+ * Just like rhashtable_lookup_insert_key(), but this function returns the
+ * object if it exists, NULL if it does not and the insertion was successful,
+ * and an ERR_PTR otherwise.
+ */
+static inline void *rhashtable_lookup_get_insert_key(
+       struct rhashtable *ht, const void *key, struct rhash_head *obj,
+       const struct rhashtable_params params)
 {
        BUG_ON(!ht->p.obj_hashfn || !key);
 
@@ -906,4 +950,12 @@ static inline int rhashtable_replace_fast(
        return err;
 }
 
+/* Obsolete function, do not use in new code. */
+static inline int rhashtable_walk_init(struct rhashtable *ht,
+                                      struct rhashtable_iter *iter, gfp_t gfp)
+{
+       rhashtable_walk_enter(ht, iter);
+       return 0;
+}
+
 #endif /* _LINUX_RHASHTABLE_H */
index 2daece8..57e5484 100644 (file)
@@ -105,7 +105,7 @@ extern int ndo_dflt_fdb_dump(struct sk_buff *skb,
                             struct netlink_callback *cb,
                             struct net_device *dev,
                             struct net_device *filter_dev,
-                            int idx);
+                            int *idx);
 extern int ndo_dflt_fdb_add(struct ndmsg *ndm,
                            struct nlattr *tb[],
                            struct net_device *dev,
index 923266c..48ec765 100644 (file)
@@ -111,7 +111,6 @@ struct uart_8250_port {
                                                 *   if no_console_suspend
                                                 */
        unsigned char           probe;
-       struct mctrl_gpios      *gpios;
 #define UART_PROBE_RSA (1 << 0)
 
        /*
index 0f665cb..4c5662f 100644 (file)
@@ -612,7 +612,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
  *     @no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
   *    @napi_id: id of the NAPI struct this skb came from
  *     @secmark: security marking
- *     @offload_fwd_mark: fwding offload mark
  *     @mark: Generic packet mark
  *     @vlan_proto: vlan encapsulation protocol
  *     @vlan_tci: vlan tag control information
@@ -730,7 +729,10 @@ struct sk_buff {
        __u8                    ipvs_property:1;
        __u8                    inner_protocol_type:1;
        __u8                    remcsum_offload:1;
-       /* 3 or 5 bit hole */
+#ifdef CONFIG_NET_SWITCHDEV
+       __u8                    offload_fwd_mark:1;
+#endif
+       /* 2, 4 or 5 bit hole */
 
 #ifdef CONFIG_NET_SCHED
        __u16                   tc_index;       /* traffic control index */
@@ -757,14 +759,9 @@ struct sk_buff {
                unsigned int    sender_cpu;
        };
 #endif
-       union {
 #ifdef CONFIG_NETWORK_SECMARK
-               __u32           secmark;
-#endif
-#ifdef CONFIG_NET_SWITCHDEV
-               __u32           offload_fwd_mark;
+       __u32           secmark;
 #endif
-       };
 
        union {
                __u32           mark;
@@ -2295,7 +2292,7 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
 
 int ___pskb_trim(struct sk_buff *skb, unsigned int len);
 
-static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
+static inline void __skb_set_length(struct sk_buff *skb, unsigned int len)
 {
        if (unlikely(skb_is_nonlinear(skb))) {
                WARN_ON(1);
@@ -2305,6 +2302,11 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
        skb_set_tail_pointer(skb, len);
 }
 
+static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
+{
+       __skb_set_length(skb, len);
+}
+
 void skb_trim(struct sk_buff *skb, unsigned int len);
 
 static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
@@ -2335,6 +2337,20 @@ static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len)
        BUG_ON(err);
 }
 
+static inline int __skb_grow(struct sk_buff *skb, unsigned int len)
+{
+       unsigned int diff = len - skb->len;
+
+       if (skb_tailroom(skb) < diff) {
+               int ret = pskb_expand_head(skb, 0, diff - skb_tailroom(skb),
+                                          GFP_ATOMIC);
+               if (ret)
+                       return ret;
+       }
+       __skb_set_length(skb, len);
+       return 0;
+}
+
 /**
  *     skb_orphan - orphan a buffer
  *     @skb: buffer to orphan
@@ -2386,6 +2402,8 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
                kfree_skb(skb);
 }
 
+void skb_rbtree_purge(struct rb_root *root);
+
 void *netdev_alloc_frag(unsigned int fragsz);
 
 struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length,
@@ -2938,6 +2956,21 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
        return __pskb_trim(skb, len);
 }
 
+static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len)
+{
+       if (skb->ip_summed == CHECKSUM_COMPLETE)
+               skb->ip_summed = CHECKSUM_NONE;
+       __skb_trim(skb, len);
+       return 0;
+}
+
+static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
+{
+       if (skb->ip_summed == CHECKSUM_COMPLETE)
+               skb->ip_summed = CHECKSUM_NONE;
+       return __skb_grow(skb, len);
+}
+
 #define skb_queue_walk(queue, skb) \
                for (skb = (queue)->next;                                       \
                     skb != (struct sk_buff *)(queue);                          \
@@ -3726,6 +3759,13 @@ static inline bool skb_is_gso_v6(const struct sk_buff *skb)
        return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
 }
 
+static inline void skb_gso_reset(struct sk_buff *skb)
+{
+       skb_shinfo(skb)->gso_size = 0;
+       skb_shinfo(skb)->gso_segs = 0;
+       skb_shinfo(skb)->gso_type = 0;
+}
+
 void __skb_warn_lro_forwarding(const struct sk_buff *skb);
 
 static inline bool skb_warn_if_lro(const struct sk_buff *skb)
index 76199b7..e302c44 100644 (file)
@@ -1,6 +1,16 @@
 #ifndef __SMC91X_H__
 #define __SMC91X_H__
 
+/*
+ * These bits define which access sizes a platform can support, rather
+ * than the maximal access size.  So, if your platform can do 16-bit
+ * and 32-bit accesses to the SMC91x device, but not 8-bit, set both
+ * SMC91X_USE_16BIT and SMC91X_USE_32BIT.
+ *
+ * The SMC91x driver requires at least one of SMC91X_USE_8BIT or
+ * SMC91X_USE_16BIT to be supported - just setting SMC91X_USE_32BIT is
+ * an invalid configuration.
+ */
 #define SMC91X_USE_8BIT (1 << 0)
 #define SMC91X_USE_16BIT (1 << 1)
 #define SMC91X_USE_32BIT (1 << 2)
index d82cb60..ecc3e07 100644 (file)
@@ -43,6 +43,8 @@ extern int proc_dostring(struct ctl_table *, int,
                         void __user *, size_t *, loff_t *);
 extern int proc_dointvec(struct ctl_table *, int,
                         void __user *, size_t *, loff_t *);
+extern int proc_douintvec(struct ctl_table *, int,
+                        void __user *, size_t *, loff_t *);
 extern int proc_dointvec_minmax(struct ctl_table *, int,
                                void __user *, size_t *, loff_t *);
 extern int proc_dointvec_jiffies(struct ctl_table *, int,
index 7be9b12..c723a46 100644 (file)
@@ -281,10 +281,9 @@ struct tcp_sock {
        struct sk_buff* lost_skb_hint;
        struct sk_buff *retransmit_skb_hint;
 
-       /* OOO segments go in this list. Note that socket lock must be held,
-        * as we do not use sk_buff_head lock.
-        */
-       struct sk_buff_head     out_of_order_queue;
+       /* OOO segments go in this rbtree. Socket lock must be held. */
+       struct rb_root  out_of_order_queue;
+       struct sk_buff  *ooo_last_skb; /* cache rb_last(out_of_order_queue) */
 
        /* SACKs data, these 2 need to be together (see tcp_options_write) */
        struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
index cbd8990..2b5b10e 100644 (file)
@@ -118,10 +118,11 @@ static inline int arch_within_stack_frames(const void * const stack,
 extern void __check_object_size(const void *ptr, unsigned long n,
                                        bool to_user);
 
-static inline void check_object_size(const void *ptr, unsigned long n,
-                                    bool to_user)
+static __always_inline void check_object_size(const void *ptr, unsigned long n,
+                                             bool to_user)
 {
-       __check_object_size(ptr, n, to_user);
+       if (!__builtin_constant_p(n))
+               __check_object_size(ptr, n, to_user);
 }
 #else
 static inline void check_object_size(const void *ptr, unsigned long n,
index 7b0f886..1061a47 100644 (file)
 #ifndef _NET_RXRPC_H
 #define _NET_RXRPC_H
 
-#include <linux/skbuff.h>
 #include <linux/rxrpc.h>
 
+struct key;
+struct sock;
+struct socket;
 struct rxrpc_call;
 
-/*
- * the mark applied to socket buffers that may be intercepted
- */
-enum rxrpc_skb_mark {
-       RXRPC_SKB_MARK_DATA,            /* data message */
-       RXRPC_SKB_MARK_FINAL_ACK,       /* final ACK received message */
-       RXRPC_SKB_MARK_BUSY,            /* server busy message */
-       RXRPC_SKB_MARK_REMOTE_ABORT,    /* remote abort message */
-       RXRPC_SKB_MARK_LOCAL_ABORT,     /* local abort message */
-       RXRPC_SKB_MARK_NET_ERROR,       /* network error message */
-       RXRPC_SKB_MARK_LOCAL_ERROR,     /* local error message */
-       RXRPC_SKB_MARK_NEW_CALL,        /* local error message */
-};
+typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *,
+                                 unsigned long);
+typedef void (*rxrpc_notify_new_call_t)(struct sock *, struct rxrpc_call *,
+                                       unsigned long);
+typedef void (*rxrpc_discard_new_call_t)(struct rxrpc_call *, unsigned long);
+typedef void (*rxrpc_user_attach_call_t)(struct rxrpc_call *, unsigned long);
 
-typedef void (*rxrpc_interceptor_t)(struct sock *, unsigned long,
-                                   struct sk_buff *);
-void rxrpc_kernel_intercept_rx_messages(struct socket *, rxrpc_interceptor_t);
+void rxrpc_kernel_new_call_notification(struct socket *,
+                                       rxrpc_notify_new_call_t,
+                                       rxrpc_discard_new_call_t);
 struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
                                           struct sockaddr_rxrpc *,
                                           struct key *,
                                           unsigned long,
-                                          gfp_t);
-int rxrpc_kernel_send_data(struct rxrpc_call *, struct msghdr *, size_t);
-void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *);
-void rxrpc_kernel_abort_call(struct rxrpc_call *, u32);
-void rxrpc_kernel_end_call(struct rxrpc_call *);
-bool rxrpc_kernel_is_data_last(struct sk_buff *);
-u32 rxrpc_kernel_get_abort_code(struct sk_buff *);
-int rxrpc_kernel_get_error_number(struct sk_buff *);
-void rxrpc_kernel_free_skb(struct sk_buff *);
-struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long);
-int rxrpc_kernel_reject_call(struct socket *);
+                                          gfp_t,
+                                          rxrpc_notify_rx_t);
+int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
+                          struct msghdr *, size_t);
+int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
+                          void *, size_t, size_t *, bool, u32 *);
+void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
+                            u32, int, const char *);
+void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
+void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
+                          struct sockaddr_rxrpc *);
+int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
+                              rxrpc_user_attach_call_t, unsigned long, gfp_t);
 
 #endif /* _NET_RXRPC_H */
index 9b4c418..fd60ecc 100644 (file)
@@ -52,7 +52,7 @@ struct unix_sock {
        struct sock             sk;
        struct unix_address     *addr;
        struct path             path;
-       struct mutex            readlock;
+       struct mutex            iolock, bindlock;
        struct sock             *peer;
        struct list_head        link;
        atomic_long_t           inflight;
index 9c23f4d..beb7610 100644 (file)
@@ -1102,6 +1102,7 @@ struct station_info {
        struct cfg80211_tid_stats pertid[IEEE80211_NUM_TIDS + 1];
 };
 
+#if IS_ENABLED(CONFIG_CFG80211)
 /**
  * cfg80211_get_station - retrieve information about a given station
  * @dev: the device where the station is supposed to be connected to
@@ -1114,6 +1115,14 @@ struct station_info {
  */
 int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
                         struct station_info *sinfo);
+#else
+static inline int cfg80211_get_station(struct net_device *dev,
+                                      const u8 *mac_addr,
+                                      struct station_info *sinfo)
+{
+       return -ENOENT;
+}
+#endif
 
 /**
  * enum monitor_flags - monitor flags
index c99ffe8..211bd3c 100644 (file)
@@ -50,7 +50,6 @@ struct devlink_sb_pool_info {
 };
 
 struct devlink_ops {
-       size_t priv_size;
        int (*port_type_set)(struct devlink_port *devlink_port,
                             enum devlink_port_type port_type);
        int (*port_split)(struct devlink *devlink, unsigned int port_index,
index 2217a3f..7556646 100644 (file)
@@ -26,6 +26,7 @@ enum dsa_tag_protocol {
        DSA_TAG_PROTO_TRAILER,
        DSA_TAG_PROTO_EDSA,
        DSA_TAG_PROTO_BRCM,
+       DSA_TAG_PROTO_QCA,
        DSA_TAG_LAST,           /* MUST BE LAST */
 };
 
@@ -165,9 +166,9 @@ struct dsa_switch {
        struct dsa_chip_data    *cd;
 
        /*
-        * The used switch driver.
+        * The switch operations.
         */
-       struct dsa_switch_driver        *drv;
+       struct dsa_switch_ops   *ops;
 
        /*
         * An array of which element [a] indicates which port on this
@@ -234,19 +235,21 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds)
 struct switchdev_trans;
 struct switchdev_obj;
 struct switchdev_obj_port_fdb;
+struct switchdev_obj_port_mdb;
 struct switchdev_obj_port_vlan;
 
-struct dsa_switch_driver {
+struct dsa_switch_ops {
        struct list_head        list;
 
-       enum dsa_tag_protocol   tag_protocol;
-
        /*
         * Probing and setup.
         */
        const char      *(*probe)(struct device *dsa_dev,
                                  struct device *host_dev, int sw_addr,
                                  void **priv);
+
+       enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds);
+
        int     (*setup)(struct dsa_switch *ds);
        int     (*set_addr)(struct dsa_switch *ds, u8 *addr);
        u32     (*get_phy_flags)(struct dsa_switch *ds, int port);
@@ -368,17 +371,27 @@ struct dsa_switch_driver {
        int     (*port_fdb_dump)(struct dsa_switch *ds, int port,
                                 struct switchdev_obj_port_fdb *fdb,
                                 int (*cb)(struct switchdev_obj *obj));
+
+       /*
+        * Multicast database
+        */
+       int     (*port_mdb_prepare)(struct dsa_switch *ds, int port,
+                                   const struct switchdev_obj_port_mdb *mdb,
+                                   struct switchdev_trans *trans);
+       void    (*port_mdb_add)(struct dsa_switch *ds, int port,
+                               const struct switchdev_obj_port_mdb *mdb,
+                               struct switchdev_trans *trans);
+       int     (*port_mdb_del)(struct dsa_switch *ds, int port,
+                               const struct switchdev_obj_port_mdb *mdb);
+       int     (*port_mdb_dump)(struct dsa_switch *ds, int port,
+                                struct switchdev_obj_port_mdb *mdb,
+                                int (*cb)(struct switchdev_obj *obj));
 };
 
-void register_switch_driver(struct dsa_switch_driver *type);
-void unregister_switch_driver(struct dsa_switch_driver *type);
+void register_switch_driver(struct dsa_switch_ops *type);
+void unregister_switch_driver(struct dsa_switch_ops *type);
 struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev);
 
-static inline void *ds_to_priv(struct dsa_switch *ds)
-{
-       return ds->priv;
-}
-
 static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst)
 {
        return dst->rcv != NULL;
@@ -386,4 +399,18 @@ static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst)
 
 void dsa_unregister_switch(struct dsa_switch *ds);
 int dsa_register_switch(struct dsa_switch *ds, struct device_node *np);
+#ifdef CONFIG_PM_SLEEP
+int dsa_switch_suspend(struct dsa_switch *ds);
+int dsa_switch_resume(struct dsa_switch *ds);
+#else
+static inline int dsa_switch_suspend(struct dsa_switch *ds)
+{
+       return 0;
+}
+static inline int dsa_switch_resume(struct dsa_switch *ds)
+{
+       return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
 #endif
index 5db9f59..6965c8f 100644 (file)
@@ -112,12 +112,13 @@ static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb
        return &dst->u.tun_info;
 }
 
-static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
-                                                __be16 flags,
-                                                __be64 tunnel_id,
-                                                int md_size)
+static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr,
+                                                   __be32 daddr,
+                                                   __u8 tos, __u8 ttl,
+                                                   __be16 flags,
+                                                   __be64 tunnel_id,
+                                                   int md_size)
 {
-       const struct iphdr *iph = ip_hdr(skb);
        struct metadata_dst *tun_dst;
 
        tun_dst = tun_rx_dst(md_size);
@@ -125,17 +126,30 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
                return NULL;
 
        ip_tunnel_key_init(&tun_dst->u.tun_info.key,
-                          iph->saddr, iph->daddr, iph->tos, iph->ttl,
+                          saddr, daddr, tos, ttl,
                           0, 0, 0, tunnel_id, flags);
        return tun_dst;
 }
 
-static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb,
+static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
                                                 __be16 flags,
                                                 __be64 tunnel_id,
                                                 int md_size)
 {
-       const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+       const struct iphdr *iph = ip_hdr(skb);
+
+       return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl,
+                               flags, tunnel_id, md_size);
+}
+
+static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr,
+                                                     const struct in6_addr *daddr,
+                                                     __u8 tos, __u8 ttl,
+                                                     __be32 label,
+                                                     __be16 flags,
+                                                     __be64 tunnel_id,
+                                                     int md_size)
+{
        struct metadata_dst *tun_dst;
        struct ip_tunnel_info *info;
 
@@ -150,14 +164,26 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb,
        info->key.tp_src = 0;
        info->key.tp_dst = 0;
 
-       info->key.u.ipv6.src = ip6h->saddr;
-       info->key.u.ipv6.dst = ip6h->daddr;
+       info->key.u.ipv6.src = *saddr;
+       info->key.u.ipv6.dst = *daddr;
 
-       info->key.tos = ipv6_get_dsfield(ip6h);
-       info->key.ttl = ip6h->hop_limit;
-       info->key.label = ip6_flowlabel(ip6h);
+       info->key.tos = tos;
+       info->key.ttl = ttl;
+       info->key.label = label;
 
        return tun_dst;
 }
 
+static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb,
+                                                  __be16 flags,
+                                                  __be64 tunnel_id,
+                                                  int md_size)
+{
+       const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+
+       return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr,
+                                 ipv6_get_dsfield(ip6h), ip6h->hop_limit,
+                                 ip6_flowlabel(ip6h), flags, tunnel_id,
+                                 md_size);
+}
 #endif /* __NET_DST_METADATA_H */
index d47ef4b..035aa77 100644 (file)
@@ -34,8 +34,7 @@ struct flowi_common {
        __u8    flowic_flags;
 #define FLOWI_FLAG_ANYSRC              0x01
 #define FLOWI_FLAG_KNOWN_NH            0x02
-#define FLOWI_FLAG_L3MDEV_SRC          0x04
-#define FLOWI_FLAG_SKIP_NH_OIF         0x08
+#define FLOWI_FLAG_SKIP_NH_OIF         0x04
        __u32   flowic_secid;
        struct flowi_tunnel flowic_tun_key;
 };
index d3d60dc..d953492 100644 (file)
@@ -32,8 +32,13 @@ struct flow_dissector_key_basic {
 };
 
 struct flow_dissector_key_tags {
-       u32     vlan_id:12,
-               flow_label:20;
+       u32     flow_label;
+};
+
+struct flow_dissector_key_vlan {
+       u16     vlan_id:12,
+               vlan_priority:3;
+       u16     padding;
 };
 
 struct flow_dissector_key_keyid {
@@ -119,7 +124,7 @@ enum flow_dissector_key_id {
        FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */
        FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */
        FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */
-       FLOW_DISSECTOR_KEY_VLANID, /* struct flow_dissector_key_flow_tags */
+       FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */
        FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */
        FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */
        FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */
@@ -148,6 +153,7 @@ struct flow_keys {
 #define FLOW_KEYS_HASH_START_FIELD basic
        struct flow_dissector_key_basic basic;
        struct flow_dissector_key_tags tags;
+       struct flow_dissector_key_vlan vlan;
        struct flow_dissector_key_keyid keyid;
        struct flow_dissector_key_ports ports;
        struct flow_dissector_key_addrs addrs;
@@ -177,7 +183,7 @@ struct flow_keys_digest {
 void make_flow_keys_digest(struct flow_keys_digest *digest,
                           const struct flow_keys *flow);
 
-static inline bool flow_keys_have_l4(struct flow_keys *keys)
+static inline bool flow_keys_have_l4(const struct flow_keys *keys)
 {
        return (keys->ports.ports || keys->tags.flow_label);
 }
index 4079fc1..7d4a72e 100644 (file)
@@ -111,6 +111,7 @@ struct fib_info {
        unsigned char           fib_scope;
        unsigned char           fib_type;
        __be32                  fib_prefsrc;
+       u32                     fib_tb_id;
        u32                     fib_priority;
        u32                     *fib_metrics;
 #define fib_mtu fib_metrics[RTAX_MTU-1]
@@ -319,7 +320,7 @@ void fib_flush_external(struct net *net);
 /* Exported by fib_semantics.c */
 int ip_fib_check_default(__be32 gw, struct net_device *dev);
 int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
-int fib_sync_down_addr(struct net *net, __be32 local);
+int fib_sync_down_addr(struct net_device *dev, __be32 local);
 int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
 
 extern u32 fib_multipath_secret __read_mostly;
index a5e7035..e598c63 100644 (file)
@@ -222,6 +222,25 @@ static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info
        return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET;
 }
 
+static inline __be64 key32_to_tunnel_id(__be32 key)
+{
+#ifdef __BIG_ENDIAN
+       return (__force __be64)key;
+#else
+       return (__force __be64)((__force u64)key << 32);
+#endif
+}
+
+/* Returns the least-significant 32 bits of a __be64. */
+static inline __be32 tunnel_id_to_key32(__be64 tun_id)
+{
+#ifdef __BIG_ENDIAN
+       return (__force __be32)tun_id;
+#else
+       return (__force __be32)((__force u64)tun_id >> 32);
+#endif
+}
+
 #ifdef CONFIG_INET
 
 int ip_tunnel_init(struct net_device *dev);
index e900950..3832099 100644 (file)
@@ -11,6 +11,7 @@
 #ifndef _NET_L3MDEV_H_
 #define _NET_L3MDEV_H_
 
+#include <net/dst.h>
 #include <net/fib_rules.h>
 
 /**
  *
  * @l3mdev_fib_table: Get FIB table id to use for lookups
  *
- * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device
+ * @l3mdev_l3_rcv:    Hook in L3 receive path
  *
- * @l3mdev_get_saddr: Get source address for a flow
+ * @l3mdev_l3_out:    Hook in L3 output path
  *
- * @l3mdev_get_rt6_dst: Get cached IPv6 rt6_info (dst_entry) for device
+ * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations
  */
 
 struct l3mdev_ops {
        u32             (*l3mdev_fib_table)(const struct net_device *dev);
        struct sk_buff * (*l3mdev_l3_rcv)(struct net_device *dev,
                                          struct sk_buff *skb, u16 proto);
-
-       /* IPv4 ops */
-       struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev,
-                                            const struct flowi4 *fl4);
-       int             (*l3mdev_get_saddr)(struct net_device *dev,
-                                           struct flowi4 *fl4);
+       struct sk_buff * (*l3mdev_l3_out)(struct net_device *dev,
+                                         struct sock *sk, struct sk_buff *skb,
+                                         u16 proto);
 
        /* IPv6 ops */
-       struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev,
+       struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev,
                                                 struct flowi6 *fl6);
-       int                (*l3mdev_get_saddr6)(struct net_device *dev,
-                                               const struct sock *sk,
-                                               struct flowi6 *fl6);
 };
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
@@ -49,6 +44,8 @@ struct l3mdev_ops {
 int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
                          struct fib_lookup_arg *arg);
 
+void l3mdev_update_flow(struct net *net, struct flowi *fl);
+
 int l3mdev_master_ifindex_rcu(const struct net_device *dev);
 static inline int l3mdev_master_ifindex(struct net_device *dev)
 {
@@ -80,7 +77,7 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
 }
 
 static inline
-const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
+struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
 {
        /* netdev_master_upper_dev_get_rcu calls
         * list_first_or_null_rcu to walk the upper dev list.
@@ -89,7 +86,7 @@ const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
         * typecast to remove the const
         */
        struct net_device *dev = (struct net_device *)_dev;
-       const struct net_device *master;
+       struct net_device *master;
 
        if (!dev)
                return NULL;
@@ -104,26 +101,6 @@ const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
        return master;
 }
 
-/* get index of an interface to use for FIB lookups. For devices
- * enslaved to an L3 master device FIB lookups are based on the
- * master index
- */
-static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
-{
-       return l3mdev_master_ifindex_rcu(dev) ? : dev->ifindex;
-}
-
-static inline int l3mdev_fib_oif(struct net_device *dev)
-{
-       int oif;
-
-       rcu_read_lock();
-       oif = l3mdev_fib_oif_rcu(dev);
-       rcu_read_unlock();
-
-       return oif;
-}
-
 u32 l3mdev_fib_table_rcu(const struct net_device *dev);
 u32 l3mdev_fib_table_by_index(struct net *net, int ifindex);
 static inline u32 l3mdev_fib_table(const struct net_device *dev)
@@ -137,15 +114,6 @@ static inline u32 l3mdev_fib_table(const struct net_device *dev)
        return tb_id;
 }
 
-static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
-                                              const struct flowi4 *fl4)
-{
-       if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rtable)
-               return dev->l3mdev_ops->l3mdev_get_rtable(dev, fl4);
-
-       return NULL;
-}
-
 static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
 {
        struct net_device *dev;
@@ -165,11 +133,7 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
        return rc;
 }
 
-int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4);
-
-struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6);
-int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
-                     struct flowi6 *fl6);
+struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6);
 
 static inline
 struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)
@@ -199,6 +163,34 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
        return l3mdev_l3_rcv(skb, AF_INET6);
 }
 
+static inline
+struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto)
+{
+       struct net_device *dev = skb_dst(skb)->dev;
+
+       if (netif_is_l3_slave(dev)) {
+               struct net_device *master;
+
+               master = netdev_master_upper_dev_get_rcu(dev);
+               if (master && master->l3mdev_ops->l3mdev_l3_out)
+                       skb = master->l3mdev_ops->l3mdev_l3_out(master, sk,
+                                                               skb, proto);
+       }
+
+       return skb;
+}
+
+static inline
+struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb)
+{
+       return l3mdev_l3_out(sk, skb, AF_INET);
+}
+
+static inline
+struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb)
+{
+       return l3mdev_l3_out(sk, skb, AF_INET6);
+}
 #else
 
 static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev)
@@ -216,20 +208,11 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
 }
 
 static inline
-const struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
+struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
 {
        return NULL;
 }
 
-static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
-{
-       return dev ? dev->ifindex : 0;
-}
-static inline int l3mdev_fib_oif(struct net_device *dev)
-{
-       return dev ? dev->ifindex : 0;
-}
-
 static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev)
 {
        return 0;
@@ -243,43 +226,37 @@ static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
        return 0;
 }
 
-static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
-                                              const struct flowi4 *fl4)
-{
-       return NULL;
-}
-
 static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
 {
        return false;
 }
 
-static inline int l3mdev_get_saddr(struct net *net, int ifindex,
-                                  struct flowi4 *fl4)
+static inline
+struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6)
 {
-       return 0;
+       return NULL;
 }
 
 static inline
-struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6)
+struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb)
 {
-       return NULL;
+       return skb;
 }
 
-static inline int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
-                                   struct flowi6 *fl6)
+static inline
+struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
 {
-       return 0;
+       return skb;
 }
 
 static inline
-struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb)
+struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb)
 {
        return skb;
 }
 
 static inline
-struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
+struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb)
 {
        return skb;
 }
@@ -290,6 +267,10 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
 {
        return 1;
 }
+static inline
+void l3mdev_update_flow(struct net *net, struct flowi *fl)
+{
+}
 #endif
 
 #endif /* _NET_L3MDEV_H_ */
index e9f116e..ea3f80f 100644 (file)
 /* lw tunnel state flags */
 #define LWTUNNEL_STATE_OUTPUT_REDIRECT BIT(0)
 #define LWTUNNEL_STATE_INPUT_REDIRECT  BIT(1)
+#define LWTUNNEL_STATE_XMIT_REDIRECT   BIT(2)
+
+enum {
+       LWTUNNEL_XMIT_DONE,
+       LWTUNNEL_XMIT_CONTINUE,
+};
+
 
 struct lwtunnel_state {
        __u16           type;
@@ -21,6 +28,7 @@ struct lwtunnel_state {
        int             (*orig_output)(struct net *net, struct sock *sk, struct sk_buff *skb);
        int             (*orig_input)(struct sk_buff *);
        int             len;
+       __u16           headroom;
        __u8            data[0];
 };
 
@@ -34,6 +42,7 @@ struct lwtunnel_encap_ops {
                          struct lwtunnel_state *lwtstate);
        int (*get_encap_size)(struct lwtunnel_state *lwtstate);
        int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b);
+       int (*xmit)(struct sk_buff *skb);
 };
 
 #ifdef CONFIG_LWTUNNEL
@@ -75,6 +84,24 @@ static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate)
 
        return false;
 }
+
+static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate)
+{
+       if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_XMIT_REDIRECT))
+               return true;
+
+       return false;
+}
+
+static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate,
+                                            unsigned int mtu)
+{
+       if (lwtunnel_xmit_redirect(lwtstate) && lwtstate->headroom < mtu)
+               return lwtstate->headroom;
+
+       return 0;
+}
+
 int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
                           unsigned int num);
 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
@@ -90,6 +117,7 @@ struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int lwtunnel_input(struct sk_buff *skb);
+int lwtunnel_xmit(struct sk_buff *skb);
 
 #else
 
@@ -117,6 +145,17 @@ static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate)
        return false;
 }
 
+static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate)
+{
+       return false;
+}
+
+static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate,
+                                            unsigned int mtu)
+{
+       return 0;
+}
+
 static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
                                         unsigned int num)
 {
@@ -170,6 +209,11 @@ static inline int lwtunnel_input(struct sk_buff *skb)
        return -EOPNOTSUPP;
 }
 
+static inline int lwtunnel_xmit(struct sk_buff *skb)
+{
+       return -EOPNOTSUPP;
+}
+
 #endif /* CONFIG_LWTUNNEL */
 
 #define MODULE_ALIAS_RTNL_LWT(encap_type) MODULE_ALIAS("rtnl-lwt-" __stringify(encap_type))
index 445b019..5041805 100644 (file)
@@ -42,7 +42,6 @@ union nf_conntrack_expect_proto {
 
 #include <linux/types.h>
 #include <linux/skbuff.h>
-#include <linux/timer.h>
 
 #ifdef CONFIG_NETFILTER_DEBUG
 #define NF_CT_ASSERT(x)                WARN_ON(!(x))
@@ -73,7 +72,7 @@ struct nf_conn_help {
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
 
 struct nf_conn {
-       /* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
+       /* Usage count in here is 1 for hash table, 1 per skb,
         * plus 1 for any connection(s) we are `master' for
         *
         * Hint, SKB address this struct and refcnt via skb->nfct and
@@ -96,8 +95,8 @@ struct nf_conn {
        /* Have we seen traffic both ways yet? (bitset) */
        unsigned long status;
 
-       /* Timer function; drops refcnt when it goes off. */
-       struct timer_list timeout;
+       /* jiffies32 when this ct is considered dead */
+       u32 timeout;
 
        possible_net_t ct_net;
 
@@ -220,21 +219,14 @@ static inline void nf_ct_refresh(struct nf_conn *ct,
        __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0);
 }
 
-bool __nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
-                      const struct sk_buff *skb, int do_acct);
-
 /* kill conntrack and do accounting */
-static inline bool nf_ct_kill_acct(struct nf_conn *ct,
-                                  enum ip_conntrack_info ctinfo,
-                                  const struct sk_buff *skb)
-{
-       return __nf_ct_kill_acct(ct, ctinfo, skb, 1);
-}
+bool nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+                    const struct sk_buff *skb);
 
 /* kill conntrack without accounting */
 static inline bool nf_ct_kill(struct nf_conn *ct)
 {
-       return __nf_ct_kill_acct(ct, 0, NULL, 0);
+       return nf_ct_delete(ct, 0, 0);
 }
 
 /* These are for NAT.  Icky. */
@@ -291,21 +283,55 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
        return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK;
 }
 
+#define nfct_time_stamp ((u32)(jiffies))
+
 /* jiffies until ct expires, 0 if already expired */
 static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
 {
-       long timeout = (long)ct->timeout.expires - (long)jiffies;
+       s32 timeout = ct->timeout - nfct_time_stamp;
 
        return timeout > 0 ? timeout : 0;
 }
 
+static inline bool nf_ct_is_expired(const struct nf_conn *ct)
+{
+       return (__s32)(ct->timeout - nfct_time_stamp) <= 0;
+}
+
+/* use after obtaining a reference count */
+static inline bool nf_ct_should_gc(const struct nf_conn *ct)
+{
+       return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) &&
+              !nf_ct_is_dying(ct);
+}
+
 struct kernel_param;
 
 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
 int nf_conntrack_hash_resize(unsigned int hashsize);
+
+extern struct hlist_nulls_head *nf_conntrack_hash;
 extern unsigned int nf_conntrack_htable_size;
+extern seqcount_t nf_conntrack_generation;
 extern unsigned int nf_conntrack_max;
 
+/* must be called with rcu read lock held */
+static inline void
+nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+{
+       struct hlist_nulls_head *hptr;
+       unsigned int sequence, hsz;
+
+       do {
+               sequence = read_seqcount_begin(&nf_conntrack_generation);
+               hsz = nf_conntrack_htable_size;
+               hptr = nf_conntrack_hash;
+       } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+       *hash = hptr;
+       *hsize = hsz;
+}
+
 struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
                                 const struct nf_conntrack_zone *zone,
                                 gfp_t flags);
index 79d7ac5..62e17d1 100644 (file)
@@ -51,8 +51,6 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
                        const struct nf_conntrack_l3proto *l3proto,
                        const struct nf_conntrack_l4proto *l4proto);
 
-void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize);
-
 /* Find a connection corresponding to a tuple. */
 struct nf_conntrack_tuple_hash *
 nf_conntrack_find_get(struct net *net,
@@ -83,7 +81,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
 
 #define CONNTRACK_LOCKS 1024
 
-extern struct hlist_nulls_head *nf_conntrack_hash;
 extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
 void nf_conntrack_lock(spinlock_t *lock);
 
index fa36447..12d967b 100644 (file)
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 
+enum nf_ct_ecache_state {
+       NFCT_ECACHE_UNKNOWN,            /* destroy event not sent */
+       NFCT_ECACHE_DESTROY_FAIL,       /* tried but failed to send destroy event */
+       NFCT_ECACHE_DESTROY_SENT,       /* sent destroy event after failure */
+};
+
 struct nf_conntrack_ecache {
-       unsigned long cache;    /* bitops want long */
-       unsigned long missed;   /* missed events */
-       u16 ctmask;             /* bitmask of ct events to be delivered */
-       u16 expmask;            /* bitmask of expect events to be delivered */
-       u32 portid;             /* netlink portid of destroyer */
+       unsigned long cache;            /* bitops want long */
+       unsigned long missed;           /* missed events */
+       u16 ctmask;                     /* bitmask of ct events to be delivered */
+       u16 expmask;                    /* bitmask of expect events to be delivered */
+       u32 portid;                     /* netlink portid of destroyer */
+       enum nf_ct_ecache_state state;  /* ecache state */
 };
 
 static inline struct nf_conntrack_ecache *
index 1a5fb36..de629f1 100644 (file)
@@ -134,14 +134,6 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
 int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto);
 void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto);
 
-static inline void nf_ct_kfree_compat_sysctl_table(struct nf_proto_net *pn)
-{
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-       kfree(pn->ctl_compat_table);
-       pn->ctl_compat_table = NULL;
-#endif
-}
-
 /* Generic netlink helpers */
 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
                               const struct nf_conntrack_tuple *tuple);
index 83d855b..ee07dc8 100644 (file)
@@ -60,8 +60,7 @@ struct nf_logger {
 int nf_log_register(u_int8_t pf, struct nf_logger *logger);
 void nf_log_unregister(struct nf_logger *logger);
 
-void nf_log_set(struct net *net, u_int8_t pf,
-               const struct nf_logger *logger);
+int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger);
 void nf_log_unset(struct net *net, const struct nf_logger *logger);
 
 int nf_log_bind_pf(struct net *net, u_int8_t pf,
index f2f1339..8972468 100644 (file)
@@ -251,7 +251,8 @@ struct nft_set_ops {
 
        int                             (*insert)(const struct net *net,
                                                  const struct nft_set *set,
-                                                 const struct nft_set_elem *elem);
+                                                 const struct nft_set_elem *elem,
+                                                 struct nft_set_ext **ext);
        void                            (*activate)(const struct net *net,
                                                    const struct nft_set *set,
                                                    const struct nft_set_elem *elem);
index d27588c..1139cde 100644 (file)
@@ -36,4 +36,8 @@ void nft_meta_set_eval(const struct nft_expr *expr,
 void nft_meta_set_destroy(const struct nft_ctx *ctx,
                          const struct nft_expr *expr);
 
+int nft_meta_set_validate(const struct nft_ctx *ctx,
+                         const struct nft_expr *expr,
+                         const struct nft_data **data);
+
 #endif
index 60fa153..02e28c5 100644 (file)
@@ -8,6 +8,10 @@ struct nft_reject {
 
 extern const struct nla_policy nft_reject_policy[];
 
+int nft_reject_validate(const struct nft_ctx *ctx,
+                       const struct nft_expr *expr,
+                       const struct nft_data **data);
+
 int nft_reject_init(const struct nft_ctx *ctx,
                    const struct nft_expr *expr,
                    const struct nlattr * const tb[]);
index 38b1a80..e469e85 100644 (file)
@@ -15,10 +15,6 @@ struct nf_proto_net {
 #ifdef CONFIG_SYSCTL
        struct ctl_table_header *ctl_table_header;
        struct ctl_table        *ctl_table;
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-       struct ctl_table_header *ctl_compat_header;
-       struct ctl_table        *ctl_compat_table;
-#endif
 #endif
        unsigned int            users;
 };
@@ -58,10 +54,6 @@ struct nf_ip_net {
        struct nf_udp_net       udp;
        struct nf_icmp_net      icmp;
        struct nf_icmp_net      icmpv6;
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-       struct ctl_table_header *ctl_table_header;
-       struct ctl_table        *ctl_table;
-#endif
 };
 
 struct ct_pcpu {
index 24cd394..27bb963 100644 (file)
@@ -11,7 +11,7 @@
 struct ctl_table_header;
 
 struct xfrm_policy_hash {
-       struct hlist_head       *table;
+       struct hlist_head       __rcu *table;
        unsigned int            hmask;
        u8                      dbits4;
        u8                      sbits4;
@@ -38,14 +38,12 @@ struct netns_xfrm {
         * mode. Also, it can be used by ah/esp icmp error handler to find
         * offending SA.
         */
-       struct hlist_head       *state_bydst;
-       struct hlist_head       *state_bysrc;
-       struct hlist_head       *state_byspi;
+       struct hlist_head       __rcu *state_bydst;
+       struct hlist_head       __rcu *state_bysrc;
+       struct hlist_head       __rcu *state_byspi;
        unsigned int            state_hmask;
        unsigned int            state_num;
        struct work_struct      state_hash_work;
-       struct hlist_head       state_gc_list;
-       struct work_struct      state_gc_work;
 
        struct list_head        policy_all;
        struct hlist_head       *policy_byidx;
@@ -73,7 +71,7 @@ struct netns_xfrm {
        struct dst_ops          xfrm6_dst_ops;
 #endif
        spinlock_t xfrm_state_lock;
-       rwlock_t xfrm_policy_lock;
+       spinlock_t xfrm_policy_lock;
        struct mutex xfrm_cfg_mutex;
 
        /* flow cache part */
index c99508d..a459be5 100644 (file)
@@ -69,17 +69,19 @@ struct tcf_exts {
        int police;
 };
 
-static inline void tcf_exts_init(struct tcf_exts *exts, int action, int police)
+static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police)
 {
 #ifdef CONFIG_NET_CLS_ACT
        exts->type = 0;
        exts->nr_actions = 0;
        exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
                                GFP_KERNEL);
-       WARN_ON(!exts->actions); /* TODO: propagate the error to callers */
+       if (!exts->actions)
+               return -ENOMEM;
 #endif
        exts->action = action;
        exts->police = police;
+       return 0;
 }
 
 /**
index ad777d7..0429d47 100644 (file)
@@ -29,7 +29,6 @@
 #include <net/flow.h>
 #include <net/inet_sock.h>
 #include <net/ip_fib.h>
-#include <net/l3mdev.h>
 #include <linux/in_route.h>
 #include <linux/rtnetlink.h>
 #include <linux/rcupdate.h>
@@ -285,15 +284,6 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
        ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
                              sport, dport, sk);
 
-       if (!src && oif) {
-               int rc;
-
-               rc = l3mdev_get_saddr(net, oif, fl4);
-               if (rc < 0)
-                       return ERR_PTR(rc);
-
-               src = fl4->saddr;
-       }
        if (!dst || !src) {
                rt = __ip_route_output_key(net, fl4);
                if (IS_ERR(rt))
index 0d50177..52a2015 100644 (file)
@@ -592,7 +592,7 @@ static inline void qdisc_qstats_drop(struct Qdisc *sch)
 
 static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch)
 {
-       qstats_drop_inc(this_cpu_ptr(sch->cpu_qstats));
+       this_cpu_inc(sch->cpu_qstats->drops);
 }
 
 static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
index ff5be7e..c797c57 100644 (file)
@@ -1020,7 +1020,6 @@ struct proto {
        void                    (*unhash)(struct sock *sk);
        void                    (*rehash)(struct sock *sk);
        int                     (*get_port)(struct sock *sk, unsigned short snum);
-       void                    (*clear_sk)(struct sock *sk, int size);
 
        /* Keeping track of sockets in use */
 #ifdef CONFIG_PROC_FS
@@ -1114,6 +1113,16 @@ static inline bool sk_stream_is_writeable(const struct sock *sk)
               sk_stream_memory_free(sk);
 }
 
+static inline int sk_under_cgroup_hierarchy(struct sock *sk,
+                                           struct cgroup *ancestor)
+{
+#ifdef CONFIG_SOCK_CGROUP_DATA
+       return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data),
+                                   ancestor);
+#else
+       return -ENOTSUPP;
+#endif
+}
 
 static inline bool sk_has_memory_pressure(const struct sock *sk)
 {
@@ -1232,8 +1241,6 @@ static inline int __sk_prot_rehash(struct sock *sk)
        return sk->sk_prot->hash(sk);
 }
 
-void sk_prot_clear_portaddr_nulls(struct sock *sk, int size);
-
 /* About 10 seconds */
 #define SOCK_DESTROY_TIME (10*HZ)
 
index fdb3d67..0c28ad9 100644 (file)
@@ -88,10 +88,7 @@ static inline void strp_pause(struct strparser *strp)
 }
 
 /* May be called without holding lock for attached socket */
-static inline void strp_unpause(struct strparser *strp)
-{
-       strp->rx_paused = 0;
-}
+void strp_unpause(struct strparser *strp);
 
 static inline void save_strp_stats(struct strparser *strp,
                                   struct strp_aggr_stats *agg_stats)
@@ -140,6 +137,6 @@ void strp_stop(struct strparser *strp);
 void strp_check_rcv(struct strparser *strp);
 int strp_init(struct strparser *strp, struct sock *csk,
              struct strp_callbacks *cb);
-void strp_tcp_data_ready(struct strparser *strp);
+void strp_data_ready(struct strparser *strp);
 
 #endif /* __NET_STRPARSER_H_ */
index 62f6a96..729fe15 100644 (file)
@@ -222,7 +222,7 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
                           u16 vid);
 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
                            struct net_device *dev,
-                           struct net_device *filter_dev, int idx);
+                           struct net_device *filter_dev, int *idx);
 void switchdev_port_fwd_mark_set(struct net_device *dev,
                                 struct net_device *group_dev,
                                 bool joining);
@@ -342,15 +342,9 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb,
                                          struct netlink_callback *cb,
                                          struct net_device *dev,
                                          struct net_device *filter_dev,
-                                         int idx)
-{
-       return idx;
-}
-
-static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
-                                              struct net_device *group_dev,
-                                              bool joining)
+                                         int *idx)
 {
+       return *idx;
 }
 
 static inline bool switchdev_port_same_parent_id(struct net_device *a,
diff --git a/include/net/tc_act/tc_skbmod.h b/include/net/tc_act/tc_skbmod.h
new file mode 100644 (file)
index 0000000..644a211
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2016, Jamal Hadi Salim
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#ifndef __NET_TC_SKBMOD_H
+#define __NET_TC_SKBMOD_H
+
+#include <net/act_api.h>
+#include <linux/tc_act/tc_skbmod.h>
+
+struct tcf_skbmod_params {
+       struct rcu_head rcu;
+       u64     flags; /*up to 64 types of operations; extend if needed */
+       u8      eth_dst[ETH_ALEN];
+       u16     eth_type;
+       u8      eth_src[ETH_ALEN];
+};
+
+struct tcf_skbmod {
+       struct tc_action        common;
+       struct tcf_skbmod_params __rcu *skbmod_p;
+};
+#define to_skbmod(a) ((struct tcf_skbmod *)a)
+
+#endif /* __NET_TC_SKBMOD_H */
diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h
new file mode 100644 (file)
index 0000000..253f8da
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2016, Amir Vadai <amir@vadai.me>
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __NET_TC_TUNNEL_KEY_H
+#define __NET_TC_TUNNEL_KEY_H
+
+#include <net/act_api.h>
+
+struct tcf_tunnel_key_params {
+       struct rcu_head         rcu;
+       int                     tcft_action;
+       int                     action;
+       struct metadata_dst     *tcft_enc_metadata;
+};
+
+struct tcf_tunnel_key {
+       struct tc_action              common;
+       struct tcf_tunnel_key_params __rcu *params;
+};
+
+#define to_tunnel_key(a) ((struct tcf_tunnel_key *)a)
+
+#endif /* __NET_TC_TUNNEL_KEY_H */
index e29f52e..6b83588 100644 (file)
@@ -20,6 +20,7 @@ struct tcf_vlan {
        int                     tcfv_action;
        u16                     tcfv_push_vid;
        __be16                  tcfv_push_proto;
+       u8                      tcfv_push_prio;
 };
 #define to_vlan(a) ((struct tcf_vlan *)a)
 
index c00e7d5..fdfbedd 100644 (file)
@@ -227,10 +227,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define        TFO_SERVER_COOKIE_NOT_REQD      0x200
 
 /* Force enable TFO on all listeners, i.e., not requiring the
- * TCP_FASTOPEN socket option. SOCKOPT1/2 determine how to set max_qlen.
+ * TCP_FASTOPEN socket option.
  */
 #define        TFO_SERVER_WO_SOCKOPT1  0x400
-#define        TFO_SERVER_WO_SOCKOPT2  0x800
 
 extern struct inet_timewait_death_row tcp_death_row;
 
@@ -604,8 +603,6 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
 void tcp_get_info(struct sock *, struct tcp_info *);
 
 /* Read 'sendfile()'-style from a TCP socket */
-typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
-                               unsigned int, size_t);
 int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
                  sk_read_actor_t recv_actor);
 
@@ -643,7 +640,7 @@ static inline void tcp_fast_path_check(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
 
-       if (skb_queue_empty(&tp->out_of_order_queue) &&
+       if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
            tp->rcv_wnd &&
            atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
            !tp->urg_data)
@@ -1164,6 +1161,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp)
 }
 
 bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
+bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
 
 #undef STATE_TRACE
 
@@ -1523,6 +1521,8 @@ static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unli
 {
        if (sk->sk_send_head == skb_unlinked)
                sk->sk_send_head = NULL;
+       if (tcp_sk(sk)->highest_sack == skb_unlinked)
+               tcp_sk(sk)->highest_sack = NULL;
 }
 
 static inline void tcp_init_send_head(struct sock *sk)
@@ -1851,6 +1851,8 @@ static inline int tcp_inq(struct sock *sk)
        return answ;
 }
 
+int tcp_peek_len(struct socket *sock);
+
 static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb)
 {
        u16 segs_in;
index 8894d71..ea53a87 100644 (file)
@@ -251,6 +251,7 @@ int udp_get_port(struct sock *sk, unsigned short snum,
                 int (*saddr_cmp)(const struct sock *,
                                  const struct sock *));
 void udp_err(struct sk_buff *, u32);
+int udp_abort(struct sock *sk, int err);
 int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
 int udp_push_pending_frames(struct sock *sk);
 void udp_flush_pending_frames(struct sock *sk);
index b96d036..0255613 100644 (file)
@@ -350,24 +350,6 @@ static inline __be32 vxlan_vni_field(__be32 vni)
 #endif
 }
 
-static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id)
-{
-#if defined(__BIG_ENDIAN)
-       return (__force __be32)tun_id;
-#else
-       return (__force __be32)((__force u64)tun_id >> 32);
-#endif
-}
-
-static inline __be64 vxlan_vni_to_tun_id(__be32 vni)
-{
-#if defined(__BIG_ENDIAN)
-       return (__force __be64)vni;
-#else
-       return (__force __be64)((u64)(__force u32)vni << 32);
-#endif
-}
-
 static inline size_t vxlan_rco_start(__be32 vni_field)
 {
        return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
index adfebd6..d2fdd6d 100644 (file)
@@ -187,7 +187,7 @@ struct xfrm_state {
        struct xfrm_replay_state_esn *preplay_esn;
 
        /* The functions for replay detection. */
-       struct xfrm_replay      *repl;
+       const struct xfrm_replay *repl;
 
        /* internal flag that only holds state for delayed aevent at the
         * moment
index 8e90dd2..e1f9673 100644 (file)
@@ -2115,22 +2115,17 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata,
                                       size_t len)
 {
        const void __user *p = udata->inbuf + offset;
-       bool ret = false;
+       bool ret;
        u8 *buf;
 
        if (len > USHRT_MAX)
                return false;
 
-       buf = kmalloc(len, GFP_KERNEL);
-       if (!buf)
+       buf = memdup_user(p, len);
+       if (IS_ERR(buf))
                return false;
 
-       if (copy_from_user(buf, p, len))
-               goto free;
-
        ret = !memchr_inv(buf, 0, len);
-
-free:
        kfree(buf);
        return ret;
 }
index b201744..fd6eb3a 100644 (file)
@@ -24,6 +24,7 @@ typedef __be32        rxrpc_serial_net_t; /* on-the-wire Rx message serial number */
  */
 struct rxrpc_wire_header {
        __be32          epoch;          /* client boot timestamp */
+#define RXRPC_RANDOM_EPOCH     0x80000000      /* Random if set, date-based if not */
 
        __be32          cid;            /* connection and channel ID */
 #define RXRPC_MAXCALLS         4                       /* max active calls per conn */
@@ -33,8 +34,6 @@ struct rxrpc_wire_header {
 #define RXRPC_CID_INC          (1 << RXRPC_CIDSHIFT)   /* connection ID increment */
 
        __be32          callNumber;     /* call ID (0 for connection-level packets) */
-#define RXRPC_PROCESS_MAXCALLS (1<<2)  /* maximum number of active calls per conn (power of 2) */
-
        __be32          seq;            /* sequence number of pkt in call stream */
        __be32          serial;         /* serial number of pkt sent to network */
 
@@ -92,10 +91,14 @@ struct rxrpc_wire_header {
 struct rxrpc_jumbo_header {
        uint8_t         flags;          /* packet flags (as per rxrpc_header) */
        uint8_t         pad;
-       __be16          _rsvd;          /* reserved (used by kerberos security as cksum) */
+       union {
+               __be16  _rsvd;          /* reserved */
+               __be16  cksum;          /* kerberos security checksum */
+       };
 };
 
 #define RXRPC_JUMBO_DATALEN    1412    /* non-terminal jumbo packet data length */
+#define RXRPC_JUMBO_SUBPKTLEN  (RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header))
 
 /*****************************************************************************/
 /*
@@ -130,6 +133,13 @@ struct rxrpc_ackpacket {
 
 } __packed;
 
+/* Some ACKs refer to specific packets and some are general and can be updated. */
+#define RXRPC_ACK_UPDATEABLE ((1 << RXRPC_ACK_REQUESTED)       |       \
+                             (1 << RXRPC_ACK_PING_RESPONSE)    |       \
+                             (1 << RXRPC_ACK_DELAY)            |       \
+                             (1 << RXRPC_ACK_IDLE))
+
+
 /*
  * ACK packets can have a further piece of information tagged on the end
  */
index 13c0b2b..73d8709 100644 (file)
@@ -11,12 +11,12 @@ struct sas_rphy;
 struct request;
 
 #if !IS_ENABLED(CONFIG_SCSI_SAS_ATTRS)
-static inline int is_sas_attached(struct scsi_device *sdev)
+static inline int scsi_is_sas_rphy(const struct device *sdev)
 {
        return 0;
 }
 #else
-extern int is_sas_attached(struct scsi_device *sdev);
+extern int scsi_is_sas_rphy(const struct device *);
 #endif
 
 static inline int sas_protocol_ata(enum sas_protocol proto)
@@ -202,7 +202,6 @@ extern int sas_rphy_add(struct sas_rphy *);
 extern void sas_rphy_remove(struct sas_rphy *);
 extern void sas_rphy_delete(struct sas_rphy *);
 extern void sas_rphy_unlink(struct sas_rphy *);
-extern int scsi_is_sas_rphy(const struct device *);
 
 struct sas_port *sas_port_alloc(struct device *, int);
 struct sas_port *sas_port_alloc_num(struct device *);
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
new file mode 100644 (file)
index 0000000..ea3b10e
--- /dev/null
@@ -0,0 +1,153 @@
+/* AF_RXRPC tracepoints
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rxrpc
+
+#if !defined(_TRACE_RXRPC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RXRPC_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(rxrpc_call,
+           TP_PROTO(struct rxrpc_call *call, enum rxrpc_call_trace op,
+                    int usage, const void *where, const void *aux),
+
+           TP_ARGS(call, op, usage, where, aux),
+
+           TP_STRUCT__entry(
+                   __field(struct rxrpc_call *,        call            )
+                   __field(int,                        op              )
+                   __field(int,                        usage           )
+                   __field(const void *,               where           )
+                   __field(const void *,               aux             )
+                            ),
+
+           TP_fast_assign(
+                   __entry->call = call;
+                   __entry->op = op;
+                   __entry->usage = usage;
+                   __entry->where = where;
+                   __entry->aux = aux;
+                          ),
+
+           TP_printk("c=%p %s u=%d sp=%pSR a=%p",
+                     __entry->call,
+                     rxrpc_call_traces[__entry->op],
+                     __entry->usage,
+                     __entry->where,
+                     __entry->aux)
+           );
+
+TRACE_EVENT(rxrpc_skb,
+           TP_PROTO(struct sk_buff *skb, int op, int usage, int mod_count,
+                    const void *where),
+
+           TP_ARGS(skb, op, usage, mod_count, where),
+
+           TP_STRUCT__entry(
+                   __field(struct sk_buff *,           skb             )
+                   __field(int,                        op              )
+                   __field(int,                        usage           )
+                   __field(int,                        mod_count       )
+                   __field(const void *,               where           )
+                            ),
+
+           TP_fast_assign(
+                   __entry->skb = skb;
+                   __entry->op = op;
+                   __entry->usage = usage;
+                   __entry->mod_count = mod_count;
+                   __entry->where = where;
+                          ),
+
+           TP_printk("s=%p %s u=%d m=%d p=%pSR",
+                     __entry->skb,
+                     (__entry->op == 0 ? "NEW" :
+                      __entry->op == 1 ? "SEE" :
+                      __entry->op == 2 ? "GET" :
+                      __entry->op == 3 ? "FRE" :
+                      "PUR"),
+                     __entry->usage,
+                     __entry->mod_count,
+                     __entry->where)
+           );
+
+TRACE_EVENT(rxrpc_rx_packet,
+           TP_PROTO(struct rxrpc_skb_priv *sp),
+
+           TP_ARGS(sp),
+
+           TP_STRUCT__entry(
+                   __field_struct(struct rxrpc_host_header,    hdr             )
+                            ),
+
+           TP_fast_assign(
+                   memcpy(&__entry->hdr, &sp->hdr, sizeof(__entry->hdr));
+                          ),
+
+           TP_printk("%08x:%08x:%08x:%04x %08x %08x %02x %02x",
+                     __entry->hdr.epoch, __entry->hdr.cid,
+                     __entry->hdr.callNumber, __entry->hdr.serviceId,
+                     __entry->hdr.serial, __entry->hdr.seq,
+                     __entry->hdr.type, __entry->hdr.flags)
+           );
+
+TRACE_EVENT(rxrpc_rx_done,
+           TP_PROTO(int result, int abort_code),
+
+           TP_ARGS(result, abort_code),
+
+           TP_STRUCT__entry(
+                   __field(int,                        result          )
+                   __field(int,                        abort_code      )
+                            ),
+
+           TP_fast_assign(
+                   __entry->result = result;
+                   __entry->abort_code = abort_code;
+                          ),
+
+           TP_printk("r=%d a=%d", __entry->result, __entry->abort_code)
+           );
+
+TRACE_EVENT(rxrpc_abort,
+           TP_PROTO(const char *why, u32 cid, u32 call_id, rxrpc_seq_t seq,
+                    int abort_code, int error),
+
+           TP_ARGS(why, cid, call_id, seq, abort_code, error),
+
+           TP_STRUCT__entry(
+                   __array(char,                       why, 4          )
+                   __field(u32,                        cid             )
+                   __field(u32,                        call_id         )
+                   __field(rxrpc_seq_t,                seq             )
+                   __field(int,                        abort_code      )
+                   __field(int,                        error           )
+                            ),
+
+           TP_fast_assign(
+                   memcpy(__entry->why, why, 4);
+                   __entry->cid = cid;
+                   __entry->call_id = call_id;
+                   __entry->abort_code = abort_code;
+                   __entry->error = error;
+                   __entry->seq = seq;
+                          ),
+
+           TP_printk("%08x:%08x s=%u a=%d e=%d %s",
+                     __entry->cid, __entry->call_id, __entry->seq,
+                     __entry->abort_code, __entry->error, __entry->why)
+           );
+
+#endif /* _TRACE_RXRPC_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
index 185f8ea..d0352a9 100644 (file)
@@ -71,6 +71,7 @@ header-y += binfmts.h
 header-y += blkpg.h
 header-y += blktrace_api.h
 header-y += bpf_common.h
+header-y += bpf_perf_event.h
 header-y += bpf.h
 header-y += bpqether.h
 header-y += bsg.h
index 9c9c6ad..5cd4d4d 100644 (file)
@@ -14,6 +14,7 @@
 
 #include <linux/atmapi.h>
 #include <linux/atmioc.h>
+#include <linux/time.h>
 
 #define ZATM_GETPOOL   _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc)
                                                /* get pool statistics */
index 866d53c..f896dfa 100644 (file)
@@ -95,6 +95,7 @@ enum bpf_prog_type {
        BPF_PROG_TYPE_SCHED_ACT,
        BPF_PROG_TYPE_TRACEPOINT,
        BPF_PROG_TYPE_XDP,
+       BPF_PROG_TYPE_PERF_EVENT,
 };
 
 #define BPF_PSEUDO_MAP_FD      1
@@ -386,6 +387,17 @@ enum bpf_func_id {
         */
        BPF_FUNC_current_task_under_cgroup,
 
+       /**
+        * bpf_skb_change_tail(skb, len, flags)
+        * The helper will resize the skb to the given new size,
+        * to be used f.e. with control messages.
+        * @skb: pointer to skb
+        * @len: new skb length
+        * @flags: reserved
+        * Return: 0 on success or negative error
+        */
+       BPF_FUNC_skb_change_tail,
+
        __BPF_FUNC_MAX_ID,
 };
 
diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h
new file mode 100644 (file)
index 0000000..0674272
--- /dev/null
@@ -0,0 +1,18 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__
+#define _UAPI__LINUX_BPF_PERF_EVENT_H__
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+struct bpf_perf_event_data {
+       struct pt_regs regs;
+       __u64 sample_period;
+};
+
+#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
index b8f38e8..099a420 100644 (file)
@@ -1362,7 +1362,14 @@ enum ethtool_link_mode_bit_indices {
        ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT        = 37,
        ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT        = 38,
        ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT    = 39,
-       ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT         = 40,
+       ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT         = 40,
+       ETHTOOL_LINK_MODE_1000baseX_Full_BIT    = 41,
+       ETHTOOL_LINK_MODE_10000baseCR_Full_BIT  = 42,
+       ETHTOOL_LINK_MODE_10000baseSR_Full_BIT  = 43,
+       ETHTOOL_LINK_MODE_10000baseLR_Full_BIT  = 44,
+       ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT = 45,
+       ETHTOOL_LINK_MODE_10000baseER_Full_BIT  = 46,
+
 
        /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
         * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_*
@@ -1371,7 +1378,7 @@ enum ethtool_link_mode_bit_indices {
         */
 
        __ETHTOOL_LINK_MODE_LAST
-         = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
+         = ETHTOOL_LINK_MODE_10000baseER_Full_BIT,
 };
 
 #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name)     \
index c186f64..ab92bca 100644 (file)
@@ -140,7 +140,7 @@ struct bridge_vlan_xstats {
        __u64 tx_bytes;
        __u64 tx_packets;
        __u16 vid;
-       __u16 pad1;
+       __u16 flags;
        __u32 pad2;
 };
 
index a1b5202..9bf3aec 100644 (file)
@@ -318,6 +318,7 @@ enum {
        IFLA_BRPORT_FLUSH,
        IFLA_BRPORT_MULTICAST_ROUTER,
        IFLA_BRPORT_PAD,
+       IFLA_BRPORT_MCAST_FLOOD,
        __IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
index 163e8ad..4bd1f55 100644 (file)
@@ -16,7 +16,8 @@
 #define _UAPI__LINUX_IF_PPPOL2TP_H
 
 #include <linux/types.h>
-
+#include <linux/in.h>
+#include <linux/in6.h>
 
 /* Structure used to connect() the socket to a particular tunnel UDP
  * socket over IPv4.
index e128769..d37bbb1 100644 (file)
 #include <asm/byteorder.h>
 
 #include <linux/socket.h>
+#include <linux/if.h>
 #include <linux/if_ether.h>
 #include <linux/if_pppol2tp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
 
 /* For user-space programs to pick up these definitions
  * which they wouldn't get otherwise without defining __KERNEL__
index 361b9f0..9865c8c 100644 (file)
@@ -2,6 +2,9 @@
 #define _UAPI_IF_TUNNEL_H_
 
 #include <linux/types.h>
+#include <linux/if.h>
+#include <linux/ip.h>
+#include <linux/in6.h>
 #include <asm/byteorder.h>
 
 
index abbd1dc..b5c366f 100644 (file)
@@ -73,6 +73,7 @@ enum {
        INET_DIAG_BC_S_COND,
        INET_DIAG_BC_D_COND,
        INET_DIAG_BC_DEV_COND,   /* u32 ifindex */
+       INET_DIAG_BC_MARK_COND,
 };
 
 struct inet_diag_hostcond {
@@ -82,6 +83,11 @@ struct inet_diag_hostcond {
        __be32  addr[0];
 };
 
+struct inet_diag_markcond {
+       __u32 mark;
+       __u32 mask;
+};
+
 /* Base info structure. It contains socket identity (addrs/ports/cookie)
  * and, alas, the information shown by netstat. */
 struct inet_diag_msg {
@@ -117,6 +123,7 @@ enum {
        INET_DIAG_LOCALS,
        INET_DIAG_PEERS,
        INET_DIAG_PAD,
+       INET_DIAG_MARK,
        __INET_DIAG_MAX,
 };
 
index 3d48014..30f031d 100644 (file)
@@ -1,11 +1,13 @@
 #ifndef _IPX_H_
 #define _IPX_H_
+#include <linux/libc-compat.h> /* for compatibility with glibc netipx/ipx.h */
 #include <linux/types.h>
 #include <linux/sockios.h>
 #include <linux/socket.h>
 #define IPX_NODE_LEN   6
 #define IPX_MTU                576
 
+#if __UAPI_DEF_SOCKADDR_IPX
 struct sockaddr_ipx {
        __kernel_sa_family_t sipx_family;
        __be16          sipx_port;
@@ -14,6 +16,7 @@ struct sockaddr_ipx {
        __u8            sipx_type;
        unsigned char   sipx_zero;      /* 16 byte fill */
 };
+#endif /* __UAPI_DEF_SOCKADDR_IPX */
 
 /*
  * So we can fit the extra info for SIOCSIFADDR into the address nicely
@@ -23,12 +26,15 @@ struct sockaddr_ipx {
 #define IPX_DLTITF     0
 #define IPX_CRTITF     1
 
+#if __UAPI_DEF_IPX_ROUTE_DEFINITION
 struct ipx_route_definition {
        __be32        ipx_network;
        __be32        ipx_router_network;
        unsigned char ipx_router_node[IPX_NODE_LEN];
 };
+#endif /* __UAPI_DEF_IPX_ROUTE_DEFINITION */
 
+#if __UAPI_DEF_IPX_INTERFACE_DEFINITION
 struct ipx_interface_definition {
        __be32        ipx_network;
        unsigned char ipx_device[16];
@@ -45,16 +51,20 @@ struct ipx_interface_definition {
 #define IPX_INTERNAL           2
        unsigned char ipx_node[IPX_NODE_LEN];
 };
-       
+#endif /* __UAPI_DEF_IPX_INTERFACE_DEFINITION */
+
+#if __UAPI_DEF_IPX_CONFIG_DATA
 struct ipx_config_data {
        unsigned char   ipxcfg_auto_select_primary;
        unsigned char   ipxcfg_auto_create_interfaces;
 };
+#endif /* __UAPI_DEF_IPX_CONFIG_DATA */
 
 /*
  * OLD Route Definition for backward compatibility.
  */
 
+#if __UAPI_DEF_IPX_ROUTE_DEF
 struct ipx_route_def {
        __be32          ipx_network;
        __be32          ipx_router_network;
@@ -67,6 +77,7 @@ struct ipx_route_def {
 #define IPX_RT_BLUEBOOK                2
 #define IPX_RT_ROUTED          1
 };
+#endif /* __UAPI_DEF_IPX_ROUTE_DEF */
 
 #define SIOCAIPXITFCRT         (SIOCPROTOPRIVATE)
 #define SIOCAIPXPRISLT         (SIOCPROTOPRIVATE + 1)
index e4f048e..44b8a6b 100644 (file)
 
 #endif /* _NETINET_IN_H */
 
+/* Coordinate with glibc netipx/ipx.h header. */
+#if defined(__NETIPX_IPX_H)
+
+#define __UAPI_DEF_SOCKADDR_IPX                        0
+#define __UAPI_DEF_IPX_ROUTE_DEFINITION                0
+#define __UAPI_DEF_IPX_INTERFACE_DEFINITION    0
+#define __UAPI_DEF_IPX_CONFIG_DATA             0
+#define __UAPI_DEF_IPX_ROUTE_DEF               0
+
+#else /* defined(__NETIPX_IPX_H) */
+
+#define __UAPI_DEF_SOCKADDR_IPX                        1
+#define __UAPI_DEF_IPX_ROUTE_DEFINITION                1
+#define __UAPI_DEF_IPX_INTERFACE_DEFINITION    1
+#define __UAPI_DEF_IPX_CONFIG_DATA             1
+#define __UAPI_DEF_IPX_ROUTE_DEF               1
+
+#endif /* defined(__NETIPX_IPX_H) */
+
 /* Definitions for xattr.h */
 #if defined(_SYS_XATTR_H)
 #define __UAPI_DEF_XATTR               0
 #define __UAPI_DEF_IN6_PKTINFO         1
 #define __UAPI_DEF_IP6_MTUINFO         1
 
+/* Definitions for ipx.h */
+#define __UAPI_DEF_SOCKADDR_IPX                        1
+#define __UAPI_DEF_IPX_ROUTE_DEFINITION                1
+#define __UAPI_DEF_IPX_INTERFACE_DEFINITION    1
+#define __UAPI_DEF_IPX_CONFIG_DATA             1
+#define __UAPI_DEF_IPX_ROUTE_DEF               1
+
 /* Definitions for xattr.h */
 #define __UAPI_DEF_XATTR               1
 
index c674ba2..28ce01d 100644 (file)
@@ -723,6 +723,26 @@ enum nft_meta_keys {
        NFT_META_PRANDOM,
 };
 
+/**
+ * enum nft_hash_attributes - nf_tables hash expression netlink attributes
+ *
+ * @NFTA_HASH_SREG: source register (NLA_U32)
+ * @NFTA_HASH_DREG: destination register (NLA_U32)
+ * @NFTA_HASH_LEN: source data length (NLA_U32)
+ * @NFTA_HASH_MODULUS: modulus value (NLA_U32)
+ * @NFTA_HASH_SEED: seed value (NLA_U32)
+ */
+enum nft_hash_attributes {
+       NFTA_HASH_UNSPEC,
+       NFTA_HASH_SREG,
+       NFTA_HASH_DREG,
+       NFTA_HASH_LEN,
+       NFTA_HASH_MODULUS,
+       NFTA_HASH_SEED,
+       __NFTA_HASH_MAX,
+};
+#define NFTA_HASH_MAX  (__NFTA_HASH_MAX - 1)
+
 /**
  * enum nft_meta_attributes - nf_tables meta expression netlink attributes
  *
@@ -880,6 +900,25 @@ enum nft_queue_attributes {
 #define NFT_QUEUE_FLAG_CPU_FANOUT      0x02 /* use current CPU (no hashing) */
 #define NFT_QUEUE_FLAG_MASK            0x03
 
+enum nft_quota_flags {
+       NFT_QUOTA_F_INV         = (1 << 0),
+};
+
+/**
+ * enum nft_quota_attributes - nf_tables quota expression netlink attributes
+ *
+ * @NFTA_QUOTA_BYTES: quota in bytes (NLA_U16)
+ * @NFTA_QUOTA_FLAGS: flags (NLA_U32)
+ */
+enum nft_quota_attributes {
+       NFTA_QUOTA_UNSPEC,
+       NFTA_QUOTA_BYTES,
+       NFTA_QUOTA_FLAGS,
+       NFTA_QUOTA_PAD,
+       __NFTA_QUOTA_MAX
+};
+#define NFTA_QUOTA_MAX         (__NFTA_QUOTA_MAX - 1)
+
 /**
  * enum nft_reject_types - nf_tables reject expression reject types
  *
@@ -1051,7 +1090,7 @@ enum nft_gen_attributes {
  * @NFTA_TRACE_NFPROTO: nf protocol processed (NLA_U32)
  * @NFTA_TRACE_POLICY: policy that decided fate of packet (NLA_U32)
  */
-enum nft_trace_attibutes {
+enum nft_trace_attributes {
        NFTA_TRACE_UNSPEC,
        NFTA_TRACE_TABLE,
        NFTA_TRACE_CHAIN,
@@ -1082,4 +1121,28 @@ enum nft_trace_types {
        __NFT_TRACETYPE_MAX
 };
 #define NFT_TRACETYPE_MAX (__NFT_TRACETYPE_MAX - 1)
+
+/**
+ * enum nft_ng_attributes - nf_tables number generator expression netlink attributes
+ *
+ * @NFTA_NG_DREG: destination register (NLA_U32)
+ * @NFTA_NG_UNTIL: source value to increment the counter until reset (NLA_U32)
+ * @NFTA_NG_TYPE: operation type (NLA_U32)
+ */
+enum nft_ng_attributes {
+       NFTA_NG_UNSPEC,
+       NFTA_NG_DREG,
+       NFTA_NG_UNTIL,
+       NFTA_NG_TYPE,
+       __NFTA_NG_MAX
+};
+#define NFTA_NG_MAX    (__NFTA_NG_MAX - 1)
+
+enum nft_ng_types {
+       NFT_NG_INCREMENTAL,
+       NFT_NG_RANDOM,
+       __NFT_NG_MAX
+};
+#define NFT_NG_MAX     (__NFT_NG_MAX - 1)
+
 #endif /* _LINUX_NF_TABLES_H */
index d95a301..59ed399 100644 (file)
@@ -583,7 +583,7 @@ enum ovs_userspace_attr {
 #define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
 
 struct ovs_action_trunc {
-       uint32_t max_len; /* Max packet size in bytes. */
+       __u32 max_len; /* Max packet size in bytes. */
 };
 
 /**
@@ -605,13 +605,13 @@ struct ovs_action_push_mpls {
  * @vlan_tci: Tag control identifier (TCI) to push.  The CFI bit must be set
  * (but it will not be set in the 802.1Q header that is pushed).
  *
- * The @vlan_tpid value is typically %ETH_P_8021Q.  The only acceptable TPID
- * values are those that the kernel module also parses as 802.1Q headers, to
- * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN
- * from having surprising results.
+ * The @vlan_tpid value is typically %ETH_P_8021Q or %ETH_P_8021AD.
+ * The only acceptable TPID values are those that the kernel module also parses
+ * as 802.1Q or 802.1AD headers, to prevent %OVS_ACTION_ATTR_PUSH_VLAN followed
+ * by %OVS_ACTION_ATTR_POP_VLAN from having surprising results.
  */
 struct ovs_action_push_vlan {
-       __be16 vlan_tpid;       /* 802.1Q TPID. */
+       __be16 vlan_tpid;       /* 802.1Q or 802.1ad TPID. */
        __be16 vlan_tci;        /* 802.1Q TCI (VLAN ID and priority). */
 };
 
@@ -632,8 +632,8 @@ enum ovs_hash_alg {
  * @hash_basis: basis used for computing hash.
  */
 struct ovs_action_hash {
-       uint32_t  hash_alg;     /* One of ovs_hash_alg. */
-       uint32_t  hash_basis;
+       __u32  hash_alg;     /* One of ovs_hash_alg. */
+       __u32  hash_basis;
 };
 
 /**
@@ -721,9 +721,10 @@ enum ovs_nat_attr {
  * is copied from the value to the packet header field, rest of the bits are
  * left unchanged.  The non-masked value bits must be passed in as zeroes.
  * Masking is not supported for the %OVS_KEY_ATTR_TUNNEL attribute.
- * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
- * packet.
- * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
+ * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q or 802.1ad header
+ * onto the packet.
+ * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q or 802.1ad header
+ * from the packet.
  * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
  * the nested %OVS_SAMPLE_ATTR_* attributes.
  * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the
index d1c1cca..8915b61 100644 (file)
@@ -428,6 +428,24 @@ enum {
        TCA_FLOWER_KEY_UDP_DST,         /* be16 */
 
        TCA_FLOWER_FLAGS,
+       TCA_FLOWER_KEY_VLAN_ID,         /* be16 */
+       TCA_FLOWER_KEY_VLAN_PRIO,       /* u8   */
+       TCA_FLOWER_KEY_VLAN_ETH_TYPE,   /* be16 */
+
+       TCA_FLOWER_KEY_ENC_KEY_ID,      /* be32 */
+       TCA_FLOWER_KEY_ENC_IPV4_SRC,    /* be32 */
+       TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,/* be32 */
+       TCA_FLOWER_KEY_ENC_IPV4_DST,    /* be32 */
+       TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,/* be32 */
+       TCA_FLOWER_KEY_ENC_IPV6_SRC,    /* struct in6_addr */
+       TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,/* struct in6_addr */
+       TCA_FLOWER_KEY_ENC_IPV6_DST,    /* struct in6_addr */
+       TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */
+
+       TCA_FLOWER_KEY_TCP_SRC_MASK,    /* be16 */
+       TCA_FLOWER_KEY_TCP_DST_MASK,    /* be16 */
+       TCA_FLOWER_KEY_UDP_SRC_MASK,    /* be16 */
+       TCA_FLOWER_KEY_UDP_DST_MASK,    /* be16 */
        __TCA_FLOWER_MAX,
 };
 
index 25a9ad8..e7a31f8 100644 (file)
@@ -235,6 +235,7 @@ enum
        LINUX_MIB_TCPSPURIOUSRTOS,              /* TCPSpuriousRTOs */
        LINUX_MIB_TCPMD5NOTFOUND,               /* TCPMD5NotFound */
        LINUX_MIB_TCPMD5UNEXPECTED,             /* TCPMD5Unexpected */
+       LINUX_MIB_TCPMD5FAILURE,                /* TCPMD5Failure */
        LINUX_MIB_SACKSHIFTED,
        LINUX_MIB_SACKMERGED,
        LINUX_MIB_SACKSHIFTFALLBACK,
diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h
new file mode 100644 (file)
index 0000000..10fc07d
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016, Jamal Hadi Salim
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#ifndef __LINUX_TC_SKBMOD_H
+#define __LINUX_TC_SKBMOD_H
+
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_SKBMOD 15
+
+#define SKBMOD_F_DMAC  0x1
+#define SKBMOD_F_SMAC  0x2
+#define SKBMOD_F_ETYPE 0x4
+#define SKBMOD_F_SWAPMAC 0x8
+
+struct tc_skbmod {
+       tc_gen;
+       __u64 flags;
+};
+
+enum {
+       TCA_SKBMOD_UNSPEC,
+       TCA_SKBMOD_TM,
+       TCA_SKBMOD_PARMS,
+       TCA_SKBMOD_DMAC,
+       TCA_SKBMOD_SMAC,
+       TCA_SKBMOD_ETYPE,
+       TCA_SKBMOD_PAD,
+       __TCA_SKBMOD_MAX
+};
+#define TCA_SKBMOD_MAX (__TCA_SKBMOD_MAX - 1)
+
+#endif
diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h
new file mode 100644 (file)
index 0000000..890106f
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016, Amir Vadai <amir@vadai.me>
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __LINUX_TC_TUNNEL_KEY_H
+#define __LINUX_TC_TUNNEL_KEY_H
+
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_TUNNEL_KEY 17
+
+#define TCA_TUNNEL_KEY_ACT_SET     1
+#define TCA_TUNNEL_KEY_ACT_RELEASE  2
+
+struct tc_tunnel_key {
+       tc_gen;
+       int t_action;
+};
+
+enum {
+       TCA_TUNNEL_KEY_UNSPEC,
+       TCA_TUNNEL_KEY_TM,
+       TCA_TUNNEL_KEY_PARMS,
+       TCA_TUNNEL_KEY_ENC_IPV4_SRC,    /* be32 */
+       TCA_TUNNEL_KEY_ENC_IPV4_DST,    /* be32 */
+       TCA_TUNNEL_KEY_ENC_IPV6_SRC,    /* struct in6_addr */
+       TCA_TUNNEL_KEY_ENC_IPV6_DST,    /* struct in6_addr */
+       TCA_TUNNEL_KEY_ENC_KEY_ID,      /* be64 */
+       TCA_TUNNEL_KEY_PAD,
+       __TCA_TUNNEL_KEY_MAX,
+};
+
+#define TCA_TUNNEL_KEY_MAX (__TCA_TUNNEL_KEY_MAX - 1)
+
+#endif
index 31151ff..be72b6e 100644 (file)
@@ -29,6 +29,7 @@ enum {
        TCA_VLAN_PUSH_VLAN_ID,
        TCA_VLAN_PUSH_VLAN_PROTOCOL,
        TCA_VLAN_PAD,
+       TCA_VLAN_PUSH_VLAN_PRIORITY,
        __TCA_VLAN_MAX,
 };
 #define TCA_VLAN_MAX (__TCA_VLAN_MAX - 1)
index 5f3f6d0..f9edd20 100644 (file)
@@ -59,6 +59,9 @@ enum {
        TIPC_NL_MON_SET,
        TIPC_NL_MON_GET,
        TIPC_NL_MON_PEER_GET,
+       TIPC_NL_PEER_REMOVE,
+       TIPC_NL_BEARER_ADD,
+       TIPC_NL_UDP_GET_REMOTEIP,
 
        __TIPC_NL_CMD_MAX,
        TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1
@@ -98,6 +101,7 @@ enum {
        TIPC_NLA_UDP_UNSPEC,
        TIPC_NLA_UDP_LOCAL,             /* sockaddr_storage */
        TIPC_NLA_UDP_REMOTE,            /* sockaddr_storage */
+       TIPC_NLA_UDP_MULTI_REMOTEIP,    /* flag */
 
        __TIPC_NLA_UDP_MAX,
        TIPC_NLA_UDP_MAX = __TIPC_NLA_UDP_MAX - 1
index 9a37c54..b5486e6 100644 (file)
@@ -9,8 +9,8 @@
 
 DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
 
-DECLARE_PER_CPU(int, xen_vcpu_id);
-static inline int xen_vcpu_nr(int cpu)
+DECLARE_PER_CPU(uint32_t, xen_vcpu_id);
+static inline uint32_t xen_vcpu_nr(int cpu)
 {
        return per_cpu(xen_vcpu_id, cpu);
 }
index d6709eb..0d302a8 100644 (file)
@@ -19,6 +19,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/file.h>
 #include <linux/kernel.h>
 #include <linux/audit.h>
 #include <linux/kthread.h>
@@ -544,10 +545,11 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
        unsigned long ino;
        dev_t dev;
 
-       rcu_read_lock();
-       exe_file = rcu_dereference(tsk->mm->exe_file);
+       exe_file = get_task_exe_file(tsk);
+       if (!exe_file)
+               return 0;
        ino = exe_file->f_inode->i_ino;
        dev = exe_file->f_inode->i_sb->s_dev;
-       rcu_read_unlock();
+       fput(exe_file);
        return audit_mark_compare(mark, ino, dev);
 }
index 03fd23d..7b7baae 100644 (file)
@@ -1018,7 +1018,7 @@ void bpf_user_rnd_init_once(void)
        prandom_init_once(&bpf_user_rnd_state);
 }
 
-u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_user_rnd_u32)
 {
        /* Should someone ever have the rather unwise idea to use some
         * of the registers passed into this function, then note that
index 1ea3afb..a5b8bf8 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/ktime.h>
 #include <linux/sched.h>
 #include <linux/uidgid.h>
+#include <linux/filter.h>
 
 /* If kernel subsystem is allowing eBPF programs to call this function,
  * inside its own verifier_ops->get_func_proto() callback it should return
  * if program is allowed to access maps, so check rcu_read_lock_held in
  * all three functions.
  */
-static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
 {
-       /* verifier checked that R1 contains a valid pointer to bpf_map
-        * and R2 points to a program stack and map->key_size bytes were
-        * initialized
-        */
-       struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
-       void *key = (void *) (unsigned long) r2;
-       void *value;
-
        WARN_ON_ONCE(!rcu_read_lock_held());
-
-       value = map->ops->map_lookup_elem(map, key);
-
-       /* lookup() returns either pointer to element value or NULL
-        * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type
-        */
-       return (unsigned long) value;
+       return (unsigned long) map->ops->map_lookup_elem(map, key);
 }
 
 const struct bpf_func_proto bpf_map_lookup_elem_proto = {
@@ -54,15 +41,11 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto = {
        .arg2_type      = ARG_PTR_TO_MAP_KEY,
 };
 
-static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
+          void *, value, u64, flags)
 {
-       struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
-       void *key = (void *) (unsigned long) r2;
-       void *value = (void *) (unsigned long) r3;
-
        WARN_ON_ONCE(!rcu_read_lock_held());
-
-       return map->ops->map_update_elem(map, key, value, r4);
+       return map->ops->map_update_elem(map, key, value, flags);
 }
 
 const struct bpf_func_proto bpf_map_update_elem_proto = {
@@ -75,13 +58,9 @@ const struct bpf_func_proto bpf_map_update_elem_proto = {
        .arg4_type      = ARG_ANYTHING,
 };
 
-static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
 {
-       struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
-       void *key = (void *) (unsigned long) r2;
-
        WARN_ON_ONCE(!rcu_read_lock_held());
-
        return map->ops->map_delete_elem(map, key);
 }
 
@@ -99,7 +78,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = {
        .ret_type       = RET_INTEGER,
 };
 
-static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_smp_processor_id)
 {
        return smp_processor_id();
 }
@@ -110,7 +89,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
        .ret_type       = RET_INTEGER,
 };
 
-static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_ktime_get_ns)
 {
        /* NMI safe access to clock monotonic */
        return ktime_get_mono_fast_ns();
@@ -122,11 +101,11 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = {
        .ret_type       = RET_INTEGER,
 };
 
-static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_current_pid_tgid)
 {
        struct task_struct *task = current;
 
-       if (!task)
+       if (unlikely(!task))
                return -EINVAL;
 
        return (u64) task->tgid << 32 | task->pid;
@@ -138,18 +117,18 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
        .ret_type       = RET_INTEGER,
 };
 
-static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_current_uid_gid)
 {
        struct task_struct *task = current;
        kuid_t uid;
        kgid_t gid;
 
-       if (!task)
+       if (unlikely(!task))
                return -EINVAL;
 
        current_uid_gid(&uid, &gid);
        return (u64) from_kgid(&init_user_ns, gid) << 32 |
-               from_kuid(&init_user_ns, uid);
+                    from_kuid(&init_user_ns, uid);
 }
 
 const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
@@ -158,10 +137,9 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
        .ret_type       = RET_INTEGER,
 };
 
-static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
 {
        struct task_struct *task = current;
-       char *buf = (char *) (long) r1;
 
        if (unlikely(!task))
                goto err_clear;
index bf4495f..732ae16 100644 (file)
@@ -116,10 +116,9 @@ free_smap:
        return ERR_PTR(err);
 }
 
-u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
+BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
+          u64, flags)
 {
-       struct pt_regs *regs = (struct pt_regs *) (long) r1;
-       struct bpf_map *map = (struct bpf_map *) (long) r2;
        struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
        struct perf_callchain_entry *trace;
        struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
index abb61f3..90493a6 100644 (file)
@@ -1637,21 +1637,42 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
        return 0;
 }
 
-static void find_good_pkt_pointers(struct verifier_env *env,
-                                  struct reg_state *dst_reg)
+static void find_good_pkt_pointers(struct verifier_state *state,
+                                  const struct reg_state *dst_reg)
 {
-       struct verifier_state *state = &env->cur_state;
        struct reg_state *regs = state->regs, *reg;
        int i;
-       /* r2 = r3;
-        * r2 += 8
-        * if (r2 > pkt_end) goto somewhere
-        * r2 == dst_reg, pkt_end == src_reg,
-        * r2=pkt(id=n,off=8,r=0)
-        * r3=pkt(id=n,off=0,r=0)
-        * find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
-        * so that range of bytes [r3, r3 + 8) is safe to access
+
+       /* LLVM can generate two kind of checks:
+        *
+        * Type 1:
+        *
+        *   r2 = r3;
+        *   r2 += 8;
+        *   if (r2 > pkt_end) goto <handle exception>
+        *   <access okay>
+        *
+        *   Where:
+        *     r2 == dst_reg, pkt_end == src_reg
+        *     r2=pkt(id=n,off=8,r=0)
+        *     r3=pkt(id=n,off=0,r=0)
+        *
+        * Type 2:
+        *
+        *   r2 = r3;
+        *   r2 += 8;
+        *   if (pkt_end >= r2) goto <access okay>
+        *   <handle exception>
+        *
+        *   Where:
+        *     pkt_end == dst_reg, r2 == src_reg
+        *     r2=pkt(id=n,off=8,r=0)
+        *     r3=pkt(id=n,off=0,r=0)
+        *
+        * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
+        * so that range of bytes [r3, r3 + 8) is safe to access.
         */
+
        for (i = 0; i < MAX_BPF_REG; i++)
                if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
                        regs[i].range = dst_reg->off;
@@ -1668,8 +1689,8 @@ static void find_good_pkt_pointers(struct verifier_env *env,
 static int check_cond_jmp_op(struct verifier_env *env,
                             struct bpf_insn *insn, int *insn_idx)
 {
-       struct reg_state *regs = env->cur_state.regs, *dst_reg;
-       struct verifier_state *other_branch;
+       struct verifier_state *other_branch, *this_branch = &env->cur_state;
+       struct reg_state *regs = this_branch->regs, *dst_reg;
        u8 opcode = BPF_OP(insn->code);
        int err;
 
@@ -1750,13 +1771,17 @@ static int check_cond_jmp_op(struct verifier_env *env,
        } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
                   dst_reg->type == PTR_TO_PACKET &&
                   regs[insn->src_reg].type == PTR_TO_PACKET_END) {
-               find_good_pkt_pointers(env, dst_reg);
+               find_good_pkt_pointers(this_branch, dst_reg);
+       } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
+                  dst_reg->type == PTR_TO_PACKET_END &&
+                  regs[insn->src_reg].type == PTR_TO_PACKET) {
+               find_good_pkt_pointers(other_branch, &regs[insn->src_reg]);
        } else if (is_pointer_value(env, insn->dst_reg)) {
                verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
                return -EACCES;
        }
        if (log_level)
-               print_verifier_state(&env->cur_state);
+               print_verifier_state(this_branch);
        return 0;
 }
 
@@ -2333,7 +2358,8 @@ static int do_check(struct verifier_env *env)
                        if (err)
                                return err;
 
-                       if (BPF_SIZE(insn->code) != BPF_W) {
+                       if (BPF_SIZE(insn->code) != BPF_W &&
+                           BPF_SIZE(insn->code) != BPF_DW) {
                                insn_idx++;
                                continue;
                        }
@@ -2510,6 +2536,20 @@ process_bpf_exit:
        return 0;
 }
 
+static int check_map_prog_compatibility(struct bpf_map *map,
+                                       struct bpf_prog *prog)
+
+{
+       if (prog->type == BPF_PROG_TYPE_PERF_EVENT &&
+           (map->map_type == BPF_MAP_TYPE_HASH ||
+            map->map_type == BPF_MAP_TYPE_PERCPU_HASH) &&
+           (map->map_flags & BPF_F_NO_PREALLOC)) {
+               verbose("perf_event programs can only use preallocated hash map\n");
+               return -EINVAL;
+       }
+       return 0;
+}
+
 /* look for pseudo eBPF instructions that access map FDs and
  * replace them with actual map pointers
  */
@@ -2517,7 +2557,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
 {
        struct bpf_insn *insn = env->prog->insnsi;
        int insn_cnt = env->prog->len;
-       int i, j;
+       int i, j, err;
 
        for (i = 0; i < insn_cnt; i++, insn++) {
                if (BPF_CLASS(insn->code) == BPF_LDX &&
@@ -2561,6 +2601,12 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
                                return PTR_ERR(map);
                        }
 
+                       err = check_map_prog_compatibility(map, env->prog);
+                       if (err) {
+                               fdput(f);
+                               return err;
+                       }
+
                        /* store map pointer inside BPF_LD_IMM64 instruction */
                        insn[0].imm = (u32) (unsigned long) map;
                        insn[1].imm = ((u64) (unsigned long) map) >> 32;
@@ -2642,9 +2688,11 @@ static int convert_ctx_accesses(struct verifier_env *env)
        for (i = 0; i < insn_cnt; i++, insn++) {
                u32 insn_delta, cnt;
 
-               if (insn->code == (BPF_LDX | BPF_MEM | BPF_W))
+               if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
+                   insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
                        type = BPF_READ;
-               else if (insn->code == (BPF_STX | BPF_MEM | BPF_W))
+               else if (insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+                        insn->code == (BPF_STX | BPF_MEM | BPF_DW))
                        type = BPF_WRITE;
                else
                        continue;
index c2de56a..7fa0c4a 100644 (file)
@@ -1,4 +1,12 @@
+# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+# CONFIG_KERNEL_GZIP is not set
+# CONFIG_KERNEL_BZIP2 is not set
+# CONFIG_KERNEL_LZMA is not set
 CONFIG_KERNEL_XZ=y
+# CONFIG_KERNEL_LZO is not set
+# CONFIG_KERNEL_LZ4 is not set
 CONFIG_OPTIMIZE_INLINING=y
+# CONFIG_SLAB is not set
+# CONFIG_SLUB is not set
 CONFIG_SLOB=y
index c7fd277..c27e533 100644 (file)
@@ -2069,6 +2069,20 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
        mutex_unlock(&cpuset_mutex);
 }
 
+/*
+ * Make sure the new task conform to the current state of its parent,
+ * which could have been changed by cpuset just after it inherits the
+ * state from the parent and before it sits on the cgroup's task list.
+ */
+void cpuset_fork(struct task_struct *task)
+{
+       if (task_css_is_root(task, cpuset_cgrp_id))
+               return;
+
+       set_cpus_allowed_ptr(task, &current->cpus_allowed);
+       task->mems_allowed = current->mems_allowed;
+}
+
 struct cgroup_subsys cpuset_cgrp_subsys = {
        .css_alloc      = cpuset_css_alloc,
        .css_online     = cpuset_css_online,
@@ -2079,6 +2093,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
        .attach         = cpuset_attach,
        .post_attach    = cpuset_post_attach,
        .bind           = cpuset_bind,
+       .fork           = cpuset_fork,
        .legacy_cftypes = files,
        .early_init     = true,
 };
index 1903b8f..a7b8c1c 100644 (file)
@@ -242,18 +242,6 @@ unlock:
        return ret;
 }
 
-static void event_function_local(struct perf_event *event, event_f func, void *data)
-{
-       struct event_function_struct efs = {
-               .event = event,
-               .func = func,
-               .data = data,
-       };
-
-       int ret = event_function(&efs);
-       WARN_ON_ONCE(ret);
-}
-
 static void event_function_call(struct perf_event *event, event_f func, void *data)
 {
        struct perf_event_context *ctx = event->ctx;
@@ -303,6 +291,54 @@ again:
        raw_spin_unlock_irq(&ctx->lock);
 }
 
+/*
+ * Similar to event_function_call() + event_function(), but hard assumes IRQs
+ * are already disabled and we're on the right CPU.
+ */
+static void event_function_local(struct perf_event *event, event_f func, void *data)
+{
+       struct perf_event_context *ctx = event->ctx;
+       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+       struct task_struct *task = READ_ONCE(ctx->task);
+       struct perf_event_context *task_ctx = NULL;
+
+       WARN_ON_ONCE(!irqs_disabled());
+
+       if (task) {
+               if (task == TASK_TOMBSTONE)
+                       return;
+
+               task_ctx = ctx;
+       }
+
+       perf_ctx_lock(cpuctx, task_ctx);
+
+       task = ctx->task;
+       if (task == TASK_TOMBSTONE)
+               goto unlock;
+
+       if (task) {
+               /*
+                * We must be either inactive or active and the right task,
+                * otherwise we're screwed, since we cannot IPI to somewhere
+                * else.
+                */
+               if (ctx->is_active) {
+                       if (WARN_ON_ONCE(task != current))
+                               goto unlock;
+
+                       if (WARN_ON_ONCE(cpuctx->task_ctx != ctx))
+                               goto unlock;
+               }
+       } else {
+               WARN_ON_ONCE(&cpuctx->ctx != ctx);
+       }
+
+       func(event, cpuctx, ctx, data);
+unlock:
+       perf_ctx_unlock(cpuctx, task_ctx);
+}
+
 #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
                       PERF_FLAG_FD_OUTPUT  |\
                       PERF_FLAG_PID_CGROUP |\
@@ -3513,9 +3549,10 @@ static int perf_event_read(struct perf_event *event, bool group)
                        .group = group,
                        .ret = 0,
                };
-               smp_call_function_single(event->oncpu,
-                                        __perf_event_read, &data, 1);
-               ret = data.ret;
+               ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
+               /* The event must have been read from an online CPU: */
+               WARN_ON_ONCE(ret);
+               ret = ret ? : data.ret;
        } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
                struct perf_event_context *ctx = event->ctx;
                unsigned long flags;
@@ -6129,7 +6166,7 @@ static int __perf_pmu_output_stop(void *info)
 {
        struct perf_event *event = info;
        struct pmu *pmu = event->pmu;
-       struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+       struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
        struct remote_output ro = {
                .rb     = event->rb,
        };
@@ -6583,15 +6620,6 @@ got_name:
        kfree(buf);
 }
 
-/*
- * Whether this @filter depends on a dynamic object which is not loaded
- * yet or its load addresses are not known.
- */
-static bool perf_addr_filter_needs_mmap(struct perf_addr_filter *filter)
-{
-       return filter->filter && filter->inode;
-}
-
 /*
  * Check whether inode and address range match filter criteria.
  */
@@ -6653,6 +6681,13 @@ static void perf_addr_filters_adjust(struct vm_area_struct *vma)
        struct perf_event_context *ctx;
        int ctxn;
 
+       /*
+        * Data tracing isn't supported yet and as such there is no need
+        * to keep track of anything that isn't related to executable code:
+        */
+       if (!(vma->vm_flags & VM_EXEC))
+               return;
+
        rcu_read_lock();
        for_each_task_context_nr(ctxn) {
                ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
@@ -6987,7 +7022,7 @@ static int __perf_event_overflow(struct perf_event *event,
                irq_work_queue(&event->pending);
        }
 
-       event->overflow_handler(event, data, regs);
+       READ_ONCE(event->overflow_handler)(event, data, regs);
 
        if (*perf_event_fasync(event) && event->pending_kill) {
                event->pending_wakeup = 1;
@@ -7602,11 +7637,83 @@ static void perf_event_free_filter(struct perf_event *event)
        ftrace_profile_free_filter(event);
 }
 
+#ifdef CONFIG_BPF_SYSCALL
+static void bpf_overflow_handler(struct perf_event *event,
+                                struct perf_sample_data *data,
+                                struct pt_regs *regs)
+{
+       struct bpf_perf_event_data_kern ctx = {
+               .data = data,
+               .regs = regs,
+       };
+       int ret = 0;
+
+       preempt_disable();
+       if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
+               goto out;
+       rcu_read_lock();
+       ret = BPF_PROG_RUN(event->prog, (void *)&ctx);
+       rcu_read_unlock();
+out:
+       __this_cpu_dec(bpf_prog_active);
+       preempt_enable();
+       if (!ret)
+               return;
+
+       event->orig_overflow_handler(event, data, regs);
+}
+
+static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+{
+       struct bpf_prog *prog;
+
+       if (event->overflow_handler_context)
+               /* hw breakpoint or kernel counter */
+               return -EINVAL;
+
+       if (event->prog)
+               return -EEXIST;
+
+       prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
+       if (IS_ERR(prog))
+               return PTR_ERR(prog);
+
+       event->prog = prog;
+       event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
+       WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
+       return 0;
+}
+
+static void perf_event_free_bpf_handler(struct perf_event *event)
+{
+       struct bpf_prog *prog = event->prog;
+
+       if (!prog)
+               return;
+
+       WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
+       event->prog = NULL;
+       bpf_prog_put(prog);
+}
+#else
+static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+{
+       return -EOPNOTSUPP;
+}
+static void perf_event_free_bpf_handler(struct perf_event *event)
+{
+}
+#endif
+
 static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 {
        bool is_kprobe, is_tracepoint;
        struct bpf_prog *prog;
 
+       if (event->attr.type == PERF_TYPE_HARDWARE ||
+           event->attr.type == PERF_TYPE_SOFTWARE)
+               return perf_event_set_bpf_handler(event, prog_fd);
+
        if (event->attr.type != PERF_TYPE_TRACEPOINT)
                return -EINVAL;
 
@@ -7647,6 +7754,8 @@ static void perf_event_free_bpf_prog(struct perf_event *event)
 {
        struct bpf_prog *prog;
 
+       perf_event_free_bpf_handler(event);
+
        if (!event->tp_event)
                return;
 
@@ -7805,7 +7914,11 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
        list_for_each_entry(filter, &ifh->list, entry) {
                event->addr_filters_offs[count] = 0;
 
-               if (perf_addr_filter_needs_mmap(filter))
+               /*
+                * Adjust base offset if the filter is associated to a binary
+                * that needs to be mapped:
+                */
+               if (filter->inode)
                        event->addr_filters_offs[count] =
                                perf_addr_filter_apply(filter, mm);
 
@@ -7936,8 +8049,10 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
                                        goto fail;
                        }
 
-                       if (token == IF_SRC_FILE) {
-                               filename = match_strdup(&args[2]);
+                       if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
+                               int fpos = filter->range ? 2 : 1;
+
+                               filename = match_strdup(&args[fpos]);
                                if (!filename) {
                                        ret = -ENOMEM;
                                        goto fail;
@@ -8957,6 +9072,19 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
        if (!overflow_handler && parent_event) {
                overflow_handler = parent_event->overflow_handler;
                context = parent_event->overflow_handler_context;
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
+               if (overflow_handler == bpf_overflow_handler) {
+                       struct bpf_prog *prog = bpf_prog_inc(parent_event->prog);
+
+                       if (IS_ERR(prog)) {
+                               err = PTR_ERR(prog);
+                               goto err_ns;
+                       }
+                       event->prog = prog;
+                       event->orig_overflow_handler =
+                               parent_event->orig_overflow_handler;
+               }
+#endif
        }
 
        if (overflow_handler) {
index b7a525a..8c50276 100644 (file)
@@ -172,8 +172,10 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
        err = -EAGAIN;
        ptep = page_check_address(page, mm, addr, &ptl, 0);
-       if (!ptep)
+       if (!ptep) {
+               mem_cgroup_cancel_charge(kpage, memcg, false);
                goto unlock;
+       }
 
        get_page(kpage);
        page_add_new_anon_rmap(kpage, vma, addr, false);
@@ -200,7 +202,6 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
        err = 0;
  unlock:
-       mem_cgroup_cancel_charge(kpage, memcg, false);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
        unlock_page(page);
        return err;
index 2f974ae..091a78b 100644 (file)
@@ -848,12 +848,7 @@ void do_exit(long code)
        TASKS_RCU(preempt_enable());
        exit_notify(tsk, group_dead);
        proc_exit_connector(tsk);
-#ifdef CONFIG_NUMA
-       task_lock(tsk);
-       mpol_put(tsk->mempolicy);
-       tsk->mempolicy = NULL;
-       task_unlock(tsk);
-#endif
+       mpol_put_task_policy(tsk);
 #ifdef CONFIG_FUTEX
        if (unlikely(current->pi_state_cache))
                kfree(current->pi_state_cache);
index 52e725d..beb3172 100644 (file)
@@ -798,6 +798,29 @@ struct file *get_mm_exe_file(struct mm_struct *mm)
 }
 EXPORT_SYMBOL(get_mm_exe_file);
 
+/**
+ * get_task_exe_file - acquire a reference to the task's executable file
+ *
+ * Returns %NULL if task's mm (if any) has no associated executable file or
+ * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
+ * User must release file via fput().
+ */
+struct file *get_task_exe_file(struct task_struct *task)
+{
+       struct file *exe_file = NULL;
+       struct mm_struct *mm;
+
+       task_lock(task);
+       mm = task->mm;
+       if (mm) {
+               if (!(task->flags & PF_KTHREAD))
+                       exe_file = get_mm_exe_file(mm);
+       }
+       task_unlock(task);
+       return exe_file;
+}
+EXPORT_SYMBOL(get_task_exe_file);
+
 /**
  * get_task_mm - acquire a reference to the task's mm
  *
@@ -913,14 +936,12 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
        deactivate_mm(tsk, mm);
 
        /*
-        * If we're exiting normally, clear a user-space tid field if
-        * requested.  We leave this alone when dying by signal, to leave
-        * the value intact in a core dump, and to save the unnecessary
-        * trouble, say, a killed vfork parent shouldn't touch this mm.
-        * Userland only wants this done for a sys_exit.
+        * Signal userspace if we're not exiting with a core dump
+        * because we want to leave the value intact for debugging
+        * purposes.
         */
        if (tsk->clear_child_tid) {
-               if (!(tsk->flags & PF_SIGNALED) &&
+               if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
                    atomic_read(&mm->mm_users) > 1) {
                        /*
                         * We don't check the error code - if userspace has
@@ -1404,7 +1425,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        p->real_start_time = ktime_get_boot_ns();
        p->io_context = NULL;
        p->audit_context = NULL;
-       threadgroup_change_begin(current);
        cgroup_fork(p);
 #ifdef CONFIG_NUMA
        p->mempolicy = mpol_dup(p->mempolicy);
@@ -1556,6 +1576,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        INIT_LIST_HEAD(&p->thread_group);
        p->task_works = NULL;
 
+       threadgroup_change_begin(current);
        /*
         * Ensure that the cgroup subsystem policies allow the new process to be
         * forked. It should be noted the the new process's css_set can be changed
@@ -1656,6 +1677,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 bad_fork_cancel_cgroup:
        cgroup_cancel_fork(p);
 bad_fork_free_pid:
+       threadgroup_change_end(current);
        if (pid != &init_struct_pid)
                free_pid(pid);
 bad_fork_cleanup_thread:
@@ -1688,7 +1710,6 @@ bad_fork_cleanup_policy:
        mpol_put(p->mempolicy);
 bad_fork_cleanup_threadgroup_lock:
 #endif
-       threadgroup_change_end(current);
        delayacct_tsk_free(p);
 bad_fork_cleanup_count:
        atomic_dec(&p->cred->user->processes);
index f689593..32f6cfc 100644 (file)
@@ -39,6 +39,7 @@ struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
                return NULL;
        }
 
+       get_online_cpus();
        if (max_vecs >= num_online_cpus()) {
                cpumask_copy(affinity_mask, cpu_online_mask);
                *nr_vecs = num_online_cpus();
@@ -56,6 +57,7 @@ struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
                }
                *nr_vecs = vecs;
        }
+       put_online_cpus();
 
        return affinity_mask;
 }
index b4c1bc7..6373890 100644 (file)
@@ -820,6 +820,17 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
        desc->name = name;
 
        if (handle != handle_bad_irq && is_chained) {
+               /*
+                * We're about to start this interrupt immediately,
+                * hence the need to set the trigger configuration.
+                * But the .set_type callback may have overridden the
+                * flow handler, ignoring that we're dealing with a
+                * chained interrupt. Reset it immediately because we
+                * do know better.
+                */
+               __irq_set_trigger(desc, irqd_get_trigger_type(&desc->irq_data));
+               desc->handle_irq = handle;
+
                irq_settings_set_noprobe(desc);
                irq_settings_set_norequest(desc);
                irq_settings_set_nothread(desc);
index 73a2b78..9530fcd 100644 (file)
@@ -1681,8 +1681,10 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
        action->dev_id = dev_id;
 
        retval = irq_chip_pm_get(&desc->irq_data);
-       if (retval < 0)
+       if (retval < 0) {
+               kfree(action);
                return retval;
+       }
 
        chip_bus_lock(desc);
        retval = __setup_irq(irq, desc, action);
@@ -1985,8 +1987,10 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler,
        action->percpu_dev_id = dev_id;
 
        retval = irq_chip_pm_get(&desc->irq_data);
-       if (retval < 0)
+       if (retval < 0) {
+               kfree(action);
                return retval;
+       }
 
        chip_bus_lock(desc);
        retval = __setup_irq(irq, desc, action);
index 503bc2d..037c321 100644 (file)
@@ -887,7 +887,10 @@ int kexec_load_purgatory(struct kimage *image, unsigned long min,
        return 0;
 out:
        vfree(pi->sechdrs);
+       pi->sechdrs = NULL;
+
        vfree(pi->purgatory_buf);
+       pi->purgatory_buf = NULL;
        return ret;
 }
 
index 251d16b..b501e39 100644 (file)
@@ -247,6 +247,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
        align_start = res->start & ~(SECTION_SIZE - 1);
        align_size = ALIGN(resource_size(res), SECTION_SIZE);
        arch_remove_memory(align_start, align_size);
+       untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
        pgmap_radix_release(res);
        dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
                        "%s: failed to free all reserved pages\n", __func__);
@@ -282,6 +283,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
                struct percpu_ref *ref, struct vmem_altmap *altmap)
 {
        resource_size_t key, align_start, align_size, align_end;
+       pgprot_t pgprot = PAGE_KERNEL;
        struct dev_pagemap *pgmap;
        struct page_map *page_map;
        int error, nid, is_ram;
@@ -351,6 +353,11 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
        if (nid < 0)
                nid = numa_mem_id();
 
+       error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(align_start), 0,
+                       align_size);
+       if (error)
+               goto err_pfn_remap;
+
        error = arch_add_memory(nid, align_start, align_size, true);
        if (error)
                goto err_add_memory;
@@ -371,6 +378,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
        return __va(res->start);
 
  err_add_memory:
+       untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
+ err_pfn_remap:
  err_radix:
        pgmap_radix_release(res);
        devres_free(page_map);
index 97b0df7..168ff44 100644 (file)
@@ -482,7 +482,16 @@ void pm_qos_update_request(struct pm_qos_request *req,
                return;
        }
 
-       cancel_delayed_work_sync(&req->work);
+       /*
+        * This function may be called very early during boot, for example,
+        * from of_clk_init(), where irq needs to stay disabled.
+        * cancel_delayed_work_sync() assumes that irq is enabled on
+        * invocation and re-enables it on return.  Avoid calling it until
+        * workqueue is initialized.
+        */
+       if (keventd_up())
+               cancel_delayed_work_sync(&req->work);
+
        __pm_qos_update_request(req, new_value);
 }
 EXPORT_SYMBOL_GPL(pm_qos_update_request);
index 9a0178c..b022284 100644 (file)
@@ -835,9 +835,9 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
  */
 static bool rtree_next_node(struct memory_bitmap *bm)
 {
-       bm->cur.node = list_entry(bm->cur.node->list.next,
-                                 struct rtree_node, list);
-       if (&bm->cur.node->list != &bm->cur.zone->leaves) {
+       if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) {
+               bm->cur.node = list_entry(bm->cur.node->list.next,
+                                         struct rtree_node, list);
                bm->cur.node_pfn += BM_BITS_PER_BLOCK;
                bm->cur.node_bit  = 0;
                touch_softlockup_watchdog();
@@ -845,9 +845,9 @@ static bool rtree_next_node(struct memory_bitmap *bm)
        }
 
        /* No more nodes, goto next zone */
-       bm->cur.zone = list_entry(bm->cur.zone->list.next,
+       if (!list_is_last(&bm->cur.zone->list, &bm->zones)) {
+               bm->cur.zone = list_entry(bm->cur.zone->list.next,
                                  struct mem_zone_bm_rtree, list);
-       if (&bm->cur.zone->list != &bm->zones) {
                bm->cur.node = list_entry(bm->cur.zone->leaves.next,
                                          struct rtree_node, list);
                bm->cur.node_pfn = 0;
index 276762f..d5760c4 100644 (file)
@@ -9,10 +9,10 @@
 
 char *_braille_console_setup(char **str, char **brl_options)
 {
-       if (!memcmp(*str, "brl,", 4)) {
+       if (!strncmp(*str, "brl,", 4)) {
                *brl_options = "";
                *str += 4;
-       } else if (!memcmp(str, "brl=", 4)) {
+       } else if (!strncmp(*str, "brl=", 4)) {
                *brl_options = *str + 4;
                *str = strchr(*brl_options, ',');
                if (!*str)
index b69eb8a..16bab47 100644 (file)
@@ -99,26 +99,32 @@ again:
        return add;
 }
 
-/*
- * printk one line from the temporary buffer from @start index until
- * and including the @end index.
- */
-static void print_nmi_seq_line(struct nmi_seq_buf *s, int start, int end)
+static void printk_nmi_flush_line(const char *text, int len)
 {
-       const char *buf = s->buffer + start;
-
        /*
         * The buffers are flushed in NMI only on panic.  The messages must
         * go only into the ring buffer at this stage.  Consoles will get
         * explicitly called later when a crashdump is not generated.
         */
        if (in_nmi())
-               printk_deferred("%.*s", (end - start) + 1, buf);
+               printk_deferred("%.*s", len, text);
        else
-               printk("%.*s", (end - start) + 1, buf);
+               printk("%.*s", len, text);
 
 }
 
+/*
+ * printk one line from the temporary buffer from @start index until
+ * and including the @end index.
+ */
+static void printk_nmi_flush_seq_line(struct nmi_seq_buf *s,
+                                       int start, int end)
+{
+       const char *buf = s->buffer + start;
+
+       printk_nmi_flush_line(buf, (end - start) + 1);
+}
+
 /*
  * Flush data from the associated per_CPU buffer. The function
  * can be called either via IRQ work or independently.
@@ -150,9 +156,11 @@ more:
         * the buffer an unexpected way. If we printed something then
         * @len must only increase.
         */
-       if (i && i >= len)
-               pr_err("printk_nmi_flush: internal error: i=%d >= len=%zu\n",
-                      i, len);
+       if (i && i >= len) {
+               const char *msg = "printk_nmi_flush: internal error\n";
+
+               printk_nmi_flush_line(msg, strlen(msg));
+       }
 
        if (!len)
                goto out; /* Someone else has already flushed the buffer. */
@@ -166,14 +174,14 @@ more:
        /* Print line by line. */
        for (; i < size; i++) {
                if (s->buffer[i] == '\n') {
-                       print_nmi_seq_line(s, last_i, i);
+                       printk_nmi_flush_seq_line(s, last_i, i);
                        last_i = i + 1;
                }
        }
        /* Check if there was a partial line. */
        if (last_i < size) {
-               print_nmi_seq_line(s, last_i, size - 1);
-               pr_cont("\n");
+               printk_nmi_flush_seq_line(s, last_i, size - 1);
+               printk_nmi_flush_line("\n", strlen("\n"));
        }
 
        /*
index 9858266..a846cf8 100644 (file)
@@ -263,6 +263,11 @@ void account_idle_time(cputime_t cputime)
                cpustat[CPUTIME_IDLE] += (__force u64) cputime;
 }
 
+/*
+ * When a guest is interrupted for a longer amount of time, missed clock
+ * ticks are not redelivered later. Due to that, this function may on
+ * occasion account more time than the calling functions think elapsed.
+ */
 static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
 {
 #ifdef CONFIG_PARAVIRT
@@ -371,7 +376,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
         * idle, or potentially user or system time. Due to rounding,
         * other time can exceed ticks occasionally.
         */
-       other = account_other_time(cputime);
+       other = account_other_time(ULONG_MAX);
        if (other >= cputime)
                return;
        cputime -= other;
@@ -486,7 +491,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
        }
 
        cputime = cputime_one_jiffy;
-       steal = steal_account_process_time(cputime);
+       steal = steal_account_process_time(ULONG_MAX);
 
        if (steal >= cputime)
                return;
@@ -516,7 +521,7 @@ void account_idle_ticks(unsigned long ticks)
        }
 
        cputime = jiffies_to_cputime(ticks);
-       steal = steal_account_process_time(cputime);
+       steal = steal_account_process_time(ULONG_MAX);
 
        if (steal >= cputime)
                return;
@@ -614,19 +619,25 @@ static void cputime_adjust(struct task_cputime *curr,
        stime = curr->stime;
        utime = curr->utime;
 
-       if (utime == 0) {
-               stime = rtime;
+       /*
+        * If either stime or both stime and utime are 0, assume all runtime is
+        * userspace. Once a task gets some ticks, the monotonicy code at
+        * 'update' will ensure things converge to the observed ratio.
+        */
+       if (stime == 0) {
+               utime = rtime;
                goto update;
        }
 
-       if (stime == 0) {
-               utime = rtime;
+       if (utime == 0) {
+               stime = rtime;
                goto update;
        }
 
        stime = scale_stime((__force u64)stime, (__force u64)rtime,
                            (__force u64)(stime + utime));
 
+update:
        /*
         * Make sure stime doesn't go backwards; this preserves monotonicity
         * for utime because rtime is monotonic.
@@ -649,7 +660,6 @@ static void cputime_adjust(struct task_cputime *curr,
                stime = rtime - utime;
        }
 
-update:
        prev->stime = stime;
        prev->utime = utime;
 out:
@@ -694,6 +704,13 @@ static cputime_t get_vtime_delta(struct task_struct *tsk)
        unsigned long now = READ_ONCE(jiffies);
        cputime_t delta, other;
 
+       /*
+        * Unlike tick based timing, vtime based timing never has lost
+        * ticks, and no need for steal time accounting to make up for
+        * lost ticks. Vtime accounts a rounded version of actual
+        * elapsed time. Limit account_other_time to prevent rounding
+        * errors from causing elapsed vtime to go negative.
+        */
        delta = jiffies_to_cputime(now - tsk->vtime_snap);
        other = account_other_time(delta);
        WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
index ef6c6c3..0db7c8a 100644 (file)
@@ -605,12 +605,16 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
                ptrace_event(PTRACE_EVENT_SECCOMP, data);
                /*
                 * The delivery of a fatal signal during event
-                * notification may silently skip tracer notification.
-                * Terminating the task now avoids executing a system
-                * call that may not be intended.
+                * notification may silently skip tracer notification,
+                * which could leave us with a potentially unmodified
+                * syscall that the tracer would have liked to have
+                * changed. Since the process is about to die, we just
+                * force the syscall to be skipped and let the signal
+                * kill the process and correctly handle any tracer exit
+                * notifications.
                 */
                if (fatal_signal_pending(current))
-                       do_exit(SIGSYS);
+                       goto skip;
                /* Check if the tracer forced the syscall to be skipped. */
                this_syscall = syscall_get_nr(current, task_pt_regs(current));
                if (this_syscall < 0)
index b43d0b2..a13bbda 100644 (file)
@@ -2140,6 +2140,21 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
        return 0;
 }
 
+static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
+                                int *valp,
+                                int write, void *data)
+{
+       if (write) {
+               if (*negp)
+                       return -EINVAL;
+               *valp = *lvalp;
+       } else {
+               unsigned int val = *valp;
+               *lvalp = (unsigned long)val;
+       }
+       return 0;
+}
+
 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
 
 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
@@ -2259,8 +2274,27 @@ static int do_proc_dointvec(struct ctl_table *table, int write,
 int proc_dointvec(struct ctl_table *table, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-    return do_proc_dointvec(table,write,buffer,lenp,ppos,
-                           NULL,NULL);
+       return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
+}
+
+/**
+ * proc_douintvec - read a vector of unsigned integers
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
+ * values from/to the user buffer, treated as an ASCII string.
+ *
+ * Returns 0 on success.
+ */
+int proc_douintvec(struct ctl_table *table, int write,
+                    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       return do_proc_dointvec(table, write, buffer, lenp, ppos,
+                               do_proc_douintvec_conv, NULL);
 }
 
 /*
@@ -2858,6 +2892,12 @@ int proc_dointvec(struct ctl_table *table, int write,
        return -ENOSYS;
 }
 
+int proc_douintvec(struct ctl_table *table, int write,
+                 void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       return -ENOSYS;
+}
+
 int proc_dointvec_minmax(struct ctl_table *table, int write,
                    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -2903,6 +2943,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
  * exception granted :-)
  */
 EXPORT_SYMBOL(proc_dointvec);
+EXPORT_SYMBOL(proc_douintvec);
 EXPORT_SYMBOL(proc_dointvec_jiffies);
 EXPORT_SYMBOL(proc_dointvec_minmax);
 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
index 204fdc8..2ec7c00 100644 (file)
@@ -908,10 +908,11 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts)
        ktime_t now, expires;
        int cpu = smp_processor_id();
 
+       now = tick_nohz_start_idle(ts);
+
        if (can_stop_idle_tick(cpu, ts)) {
                int was_stopped = ts->tick_stopped;
 
-               now = tick_nohz_start_idle(ts);
                ts->idle_calls++;
 
                expires = tick_nohz_stop_sched_tick(ts, now, cpu);
index 3b65746..e07fb09 100644 (file)
@@ -401,7 +401,10 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
        do {
                seq = raw_read_seqcount_latch(&tkf->seq);
                tkr = tkf->base + (seq & 0x01);
-               now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
+               now = ktime_to_ns(tkr->base);
+
+               now += clocksource_delta(tkr->read(tkr->clock),
+                                        tkr->cycle_last, tkr->mask);
        } while (read_seqcount_retry(&tkf->seq, seq));
 
        return now;
index f6bd652..107310a 100644 (file)
@@ -23,7 +23,9 @@
 
 #include "timekeeping_internal.h"
 
-static unsigned int sleep_time_bin[32] = {0};
+#define NUM_BINS 32
+
+static unsigned int sleep_time_bin[NUM_BINS] = {0};
 
 static int tk_debug_show_sleep_time(struct seq_file *s, void *data)
 {
@@ -69,6 +71,9 @@ late_initcall(tk_debug_sleep_time_init);
 
 void tk_debug_account_sleep_time(struct timespec64 *t)
 {
-       sleep_time_bin[fls(t->tv_sec)]++;
+       /* Cap bin index so we don't overflow the array */
+       int bin = min(fls(t->tv_sec), NUM_BINS-1);
+
+       sleep_time_bin[bin]++;
 }
 
index 7598e6c..dbafc5d 100644 (file)
@@ -223,7 +223,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
        what |= MASK_TC_BIT(op_flags, META);
        what |= MASK_TC_BIT(op_flags, PREFLUSH);
        what |= MASK_TC_BIT(op_flags, FUA);
-       if (op == REQ_OP_DISCARD)
+       if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)
                what |= BLK_TC_ACT(BLK_TC_DISCARD);
        if (op == REQ_OP_FLUSH)
                what |= BLK_TC_ACT(BLK_TC_FLUSH);
index ad35213..5dcb992 100644 (file)
@@ -1,4 +1,5 @@
 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016 Facebook
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -8,6 +9,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/bpf.h>
+#include <linux/bpf_perf_event.h>
 #include <linux/filter.h>
 #include <linux/uaccess.h>
 #include <linux/ctype.h>
@@ -59,11 +61,9 @@ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
 }
 EXPORT_SYMBOL_GPL(trace_call_bpf);
 
-static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
 {
-       void *dst = (void *) (long) r1;
-       int ret, size = (int) r2;
-       void *unsafe_ptr = (void *) (long) r3;
+       int ret;
 
        ret = probe_kernel_read(dst, unsafe_ptr, size);
        if (unlikely(ret < 0))
@@ -81,12 +81,9 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
        .arg3_type      = ARG_ANYTHING,
 };
 
-static u64 bpf_probe_write_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
+          u32, size)
 {
-       void *unsafe_ptr = (void *) (long) r1;
-       void *src = (void *) (long) r2;
-       int size = (int) r3;
-
        /*
         * Ensure we're in user context which is safe for the helper to
         * run. This helper has no business in a kthread.
@@ -128,9 +125,9 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
  * limited trace_printk()
  * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
  */
-static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
+BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
+          u64, arg2, u64, arg3)
 {
-       char *fmt = (char *) (long) r1;
        bool str_seen = false;
        int mod[3] = {};
        int fmt_cnt = 0;
@@ -176,16 +173,16 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
 
                                switch (fmt_cnt) {
                                case 1:
-                                       unsafe_addr = r3;
-                                       r3 = (long) buf;
+                                       unsafe_addr = arg1;
+                                       arg1 = (long) buf;
                                        break;
                                case 2:
-                                       unsafe_addr = r4;
-                                       r4 = (long) buf;
+                                       unsafe_addr = arg2;
+                                       arg2 = (long) buf;
                                        break;
                                case 3:
-                                       unsafe_addr = r5;
-                                       r5 = (long) buf;
+                                       unsafe_addr = arg3;
+                                       arg3 = (long) buf;
                                        break;
                                }
                                buf[0] = 0;
@@ -207,9 +204,9 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
        }
 
        return __trace_printk(1/* fake ip will not be printed */, fmt,
-                             mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3,
-                             mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4,
-                             mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5);
+                             mod[0] == 2 ? arg1 : mod[0] == 1 ? (long) arg1 : (u32) arg1,
+                             mod[1] == 2 ? arg2 : mod[1] == 1 ? (long) arg2 : (u32) arg2,
+                             mod[2] == 2 ? arg3 : mod[2] == 1 ? (long) arg3 : (u32) arg3);
 }
 
 static const struct bpf_func_proto bpf_trace_printk_proto = {
@@ -231,9 +228,8 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
        return &bpf_trace_printk_proto;
 }
 
-static u64 bpf_perf_event_read(u64 r1, u64 flags, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
 {
-       struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
        struct bpf_array *array = container_of(map, struct bpf_array, map);
        unsigned int cpu = smp_processor_id();
        u64 index = flags & BPF_F_INDEX_MASK;
@@ -310,11 +306,9 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
        return 0;
 }
 
-static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
+BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
+          u64, flags, void *, data, u64, size)
 {
-       struct pt_regs *regs = (struct pt_regs *)(long) r1;
-       struct bpf_map *map  = (struct bpf_map *)(long) r2;
-       void *data = (void *)(long) r4;
        struct perf_raw_record raw = {
                .frag = {
                        .size = size,
@@ -365,7 +359,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
        return __bpf_perf_event_output(regs, map, flags, &raw);
 }
 
-static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_current_task)
 {
        return (long) current;
 }
@@ -376,16 +370,13 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
        .ret_type       = RET_INTEGER,
 };
 
-static u64 bpf_current_task_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
 {
-       struct bpf_map *map = (struct bpf_map *)(long)r1;
        struct bpf_array *array = container_of(map, struct bpf_array, map);
        struct cgroup *cgrp;
-       u32 idx = (u32)r2;
 
        if (unlikely(in_interrupt()))
                return -EINVAL;
-
        if (unlikely(idx >= array->map.max_entries))
                return -E2BIG;
 
@@ -479,16 +470,17 @@ static struct bpf_prog_type_list kprobe_tl = {
        .type   = BPF_PROG_TYPE_KPROBE,
 };
 
-static u64 bpf_perf_event_output_tp(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
+BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
+          u64, flags, void *, data, u64, size)
 {
+       struct pt_regs *regs = *(struct pt_regs **)tp_buff;
+
        /*
         * r1 points to perf tracepoint buffer where first 8 bytes are hidden
         * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
-        * from there and call the same bpf_perf_event_output() helper
+        * from there and call the same bpf_perf_event_output() helper inline.
         */
-       u64 ctx = *(long *)(uintptr_t)r1;
-
-       return bpf_perf_event_output(ctx, r2, index, r4, size);
+       return ____bpf_perf_event_output(regs, map, flags, data, size);
 }
 
 static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
@@ -502,11 +494,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
        .arg5_type      = ARG_CONST_STACK_SIZE,
 };
 
-static u64 bpf_get_stackid_tp(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
+          u64, flags)
 {
-       u64 ctx = *(long *)(uintptr_t)r1;
+       struct pt_regs *regs = *(struct pt_regs **)tp_buff;
 
-       return bpf_get_stackid(ctx, r2, r3, r4, r5);
+       /*
+        * Same comment as in bpf_perf_event_output_tp(), only that this time
+        * the other helper's function body cannot be inlined due to being
+        * external, thus we need to call raw helper function.
+        */
+       return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
+                              flags, 0, 0);
 }
 
 static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
@@ -552,10 +551,69 @@ static struct bpf_prog_type_list tracepoint_tl = {
        .type   = BPF_PROG_TYPE_TRACEPOINT,
 };
 
+static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+                                   enum bpf_reg_type *reg_type)
+{
+       if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
+               return false;
+       if (type != BPF_READ)
+               return false;
+       if (off % size != 0)
+               return false;
+       if (off == offsetof(struct bpf_perf_event_data, sample_period)) {
+               if (size != sizeof(u64))
+                       return false;
+       } else {
+               if (size != sizeof(long))
+                       return false;
+       }
+       return true;
+}
+
+static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+                                     int src_reg, int ctx_off,
+                                     struct bpf_insn *insn_buf,
+                                     struct bpf_prog *prog)
+{
+       struct bpf_insn *insn = insn_buf;
+
+       switch (ctx_off) {
+       case offsetof(struct bpf_perf_event_data, sample_period):
+               BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64));
+
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
+                                                      data), dst_reg, src_reg,
+                                     offsetof(struct bpf_perf_event_data_kern, data));
+               *insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg,
+                                     offsetof(struct perf_sample_data, period));
+               break;
+       default:
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
+                                                      regs), dst_reg, src_reg,
+                                     offsetof(struct bpf_perf_event_data_kern, regs));
+               *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), dst_reg, dst_reg, ctx_off);
+               break;
+       }
+
+       return insn - insn_buf;
+}
+
+static const struct bpf_verifier_ops perf_event_prog_ops = {
+       .get_func_proto         = tp_prog_func_proto,
+       .is_valid_access        = pe_prog_is_valid_access,
+       .convert_ctx_access     = pe_prog_convert_ctx_access,
+};
+
+static struct bpf_prog_type_list perf_event_tl = {
+       .ops    = &perf_event_prog_ops,
+       .type   = BPF_PROG_TYPE_PERF_EVENT,
+};
+
 static int __init register_kprobe_prog_ops(void)
 {
        bpf_register_prog_type(&kprobe_tl);
        bpf_register_prog_type(&tracepoint_tl);
+       bpf_register_prog_type(&perf_event_tl);
        return 0;
 }
 late_initcall(register_kprobe_prog_ops);
index 2307d7c..2e2cca5 100644 (file)
@@ -1686,24 +1686,6 @@ config LATENCYTOP
          Enable this option if you want to use the LatencyTOP tool
          to find out which userspace is blocking on what kernel operations.
 
-config ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
-       bool
-
-config DEBUG_STRICT_USER_COPY_CHECKS
-       bool "Strict user copy size checks"
-       depends on ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
-       depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING
-       help
-         Enabling this option turns a certain set of sanity checks for user
-         copy operations into compile time failures.
-
-         The copy_from_user() etc checks are there to help test if there
-         are sufficient security checks on the length argument of
-         the copy operation, by having gcc prove that the argument is
-         within bounds.
-
-         If unsure, say N.
-
 source kernel/trace/Kconfig
 
 menu "Runtime Testing"
index cfa68eb..5dc77a8 100644 (file)
@@ -24,7 +24,6 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
         is_single_threaded.o plist.o decompress.o kobject_uevent.o \
         earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o
 
-obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
 lib-$(CONFIG_HAS_DMA) += dma-noop.o
index 5ba520b..06c2872 100644 (file)
@@ -77,17 +77,18 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
        size = min_t(unsigned int, size, tbl->size >> 1);
 
        if (sizeof(spinlock_t) != 0) {
+               tbl->locks = NULL;
 #ifdef CONFIG_NUMA
                if (size * sizeof(spinlock_t) > PAGE_SIZE &&
                    gfp == GFP_KERNEL)
                        tbl->locks = vmalloc(size * sizeof(spinlock_t));
-               else
 #endif
                if (gfp != GFP_KERNEL)
                        gfp |= __GFP_NOWARN | __GFP_NORETRY;
 
-               tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
-                                          gfp);
+               if (!tbl->locks)
+                       tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
+                                                  gfp);
                if (!tbl->locks)
                        return -ENOMEM;
                for (i = 0; i < size; i++)
@@ -443,7 +444,8 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
 struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
                                            const void *key,
                                            struct rhash_head *obj,
-                                           struct bucket_table *tbl)
+                                           struct bucket_table *tbl,
+                                           void **data)
 {
        struct rhash_head *head;
        unsigned int hash;
@@ -454,8 +456,11 @@ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
        spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
 
        err = -EEXIST;
-       if (key && rhashtable_lookup_fast(ht, key, ht->p))
-               goto exit;
+       if (key) {
+               *data = rhashtable_lookup_fast(ht, key, ht->p);
+               if (*data)
+                       goto exit;
+       }
 
        err = -E2BIG;
        if (unlikely(rht_grow_above_max(ht, tbl)))
@@ -489,10 +494,9 @@ exit:
 EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
 
 /**
- * rhashtable_walk_init - Initialise an iterator
+ * rhashtable_walk_enter - Initialise an iterator
  * @ht:                Table to walk over
  * @iter:      Hash table Iterator
- * @gfp:       GFP flags for allocations
  *
  * This function prepares a hash table walk.
  *
@@ -507,30 +511,22 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
  * This function may sleep so you must not call it from interrupt
  * context or with spin locks held.
  *
- * You must call rhashtable_walk_exit if this function returns
- * successfully.
+ * You must call rhashtable_walk_exit after this function returns.
  */
-int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter,
-                        gfp_t gfp)
+void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
 {
        iter->ht = ht;
        iter->p = NULL;
        iter->slot = 0;
        iter->skip = 0;
 
-       iter->walker = kmalloc(sizeof(*iter->walker), gfp);
-       if (!iter->walker)
-               return -ENOMEM;
-
        spin_lock(&ht->lock);
-       iter->walker->tbl =
+       iter->walker.tbl =
                rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
-       list_add(&iter->walker->list, &iter->walker->tbl->walkers);
+       list_add(&iter->walker.list, &iter->walker.tbl->walkers);
        spin_unlock(&ht->lock);
-
-       return 0;
 }
-EXPORT_SYMBOL_GPL(rhashtable_walk_init);
+EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
 
 /**
  * rhashtable_walk_exit - Free an iterator
@@ -541,10 +537,9 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init);
 void rhashtable_walk_exit(struct rhashtable_iter *iter)
 {
        spin_lock(&iter->ht->lock);
-       if (iter->walker->tbl)
-               list_del(&iter->walker->list);
+       if (iter->walker.tbl)
+               list_del(&iter->walker.list);
        spin_unlock(&iter->ht->lock);
-       kfree(iter->walker);
 }
 EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
 
@@ -570,12 +565,12 @@ int rhashtable_walk_start(struct rhashtable_iter *iter)
        rcu_read_lock();
 
        spin_lock(&ht->lock);
-       if (iter->walker->tbl)
-               list_del(&iter->walker->list);
+       if (iter->walker.tbl)
+               list_del(&iter->walker.list);
        spin_unlock(&ht->lock);
 
-       if (!iter->walker->tbl) {
-               iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
+       if (!iter->walker.tbl) {
+               iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
                return -EAGAIN;
        }
 
@@ -597,7 +592,7 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start);
  */
 void *rhashtable_walk_next(struct rhashtable_iter *iter)
 {
-       struct bucket_table *tbl = iter->walker->tbl;
+       struct bucket_table *tbl = iter->walker.tbl;
        struct rhashtable *ht = iter->ht;
        struct rhash_head *p = iter->p;
 
@@ -630,8 +625,8 @@ next:
        /* Ensure we see any new tables. */
        smp_rmb();
 
-       iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht);
-       if (iter->walker->tbl) {
+       iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+       if (iter->walker.tbl) {
                iter->slot = 0;
                iter->skip = 0;
                return ERR_PTR(-EAGAIN);
@@ -651,7 +646,7 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
        __releases(RCU)
 {
        struct rhashtable *ht;
-       struct bucket_table *tbl = iter->walker->tbl;
+       struct bucket_table *tbl = iter->walker.tbl;
 
        if (!tbl)
                goto out;
@@ -660,9 +655,9 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
 
        spin_lock(&ht->lock);
        if (tbl->rehash < tbl->size)
-               list_add(&iter->walker->list, &tbl->walkers);
+               list_add(&iter->walker.list, &tbl->walkers);
        else
-               iter->walker->tbl = NULL;
+               iter->walker.tbl = NULL;
        spin_unlock(&ht->lock);
 
        iter->p = NULL;
index 93f4501..94346b4 100644 (file)
@@ -5485,6 +5485,7 @@ static struct sk_buff *populate_skb(char *buf, int size)
        skb->hash = SKB_HASH;
        skb->queue_mapping = SKB_QUEUE_MAP;
        skb->vlan_tci = SKB_VLAN_TCI;
+       skb->vlan_proto = htons(ETH_P_IP);
        skb->dev = &dev;
        skb->dev->ifindex = SKB_DEV_IFINDEX;
        skb->dev->type = SKB_DEV_TYPE;
index 66c5fc8..cac20c5 100644 (file)
@@ -143,7 +143,7 @@ static int __init
 test_hash_init(void)
 {
        char buf[SIZE+1];
-       u32 string_or = 0, hash_or[2][33] = { 0 };
+       u32 string_or = 0, hash_or[2][33] = { { 0, } };
        unsigned tests = 0;
        unsigned long long h64 = 0;
        int i, j;
@@ -219,21 +219,27 @@ test_hash_init(void)
        }
 
        /* Issue notices about skipped tests. */
-#ifndef HAVE_ARCH__HASH_32
-       pr_info("__hash_32() has no arch implementation to test.");
-#elif HAVE_ARCH__HASH_32 != 1
+#ifdef HAVE_ARCH__HASH_32
+#if HAVE_ARCH__HASH_32 != 1
        pr_info("__hash_32() is arch-specific; not compared to generic.");
 #endif
-#ifndef HAVE_ARCH_HASH_32
-       pr_info("hash_32() has no arch implementation to test.");
-#elif HAVE_ARCH_HASH_32 != 1
+#else
+       pr_info("__hash_32() has no arch implementation to test.");
+#endif
+#ifdef HAVE_ARCH_HASH_32
+#if HAVE_ARCH_HASH_32 != 1
        pr_info("hash_32() is arch-specific; not compared to generic.");
 #endif
-#ifndef HAVE_ARCH_HASH_64
-       pr_info("hash_64() has no arch implementation to test.");
-#elif HAVE_ARCH_HASH_64 != 1
+#else
+       pr_info("hash_32() has no arch implementation to test.");
+#endif
+#ifdef HAVE_ARCH_HASH_64
+#if HAVE_ARCH_HASH_64 != 1
        pr_info("hash_64() is arch-specific; not compared to generic.");
 #endif
+#else
+       pr_info("hash_64() has no arch implementation to test.");
+#endif
 
        pr_notice("%u tests passed.", tests);
 
diff --git a/lib/usercopy.c b/lib/usercopy.c
deleted file mode 100644 (file)
index 4f5b1dd..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-#include <linux/export.h>
-#include <linux/bug.h>
-#include <linux/uaccess.h>
-
-void copy_from_user_overflow(void)
-{
-       WARN(1, "Buffer overflow detected!\n");
-}
-EXPORT_SYMBOL(copy_from_user_overflow);
index 78a23c5..be0ee11 100644 (file)
@@ -262,7 +262,14 @@ config COMPACTION
        select MIGRATION
        depends on MMU
        help
-         Allows the compaction of memory for the allocation of huge pages.
+          Compaction is the only memory management component to form
+          high order (larger physically contiguous) memory blocks
+          reliably. The page allocator relies on compaction heavily and
+          the lack of the feature can lead to unexpected OOM killer
+          invocations for high order memory requests. You shouldn't
+          disable this option unless there really is a strong reason for
+          it and then we would be really interested to hear about that at
+          linux-mm@kvack.org.
 
 #
 # support for page migration
index 2373f0a..a6abd76 100644 (file)
@@ -1078,7 +1078,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
                goto out;
 
        page = pmd_page(*pmd);
-       VM_BUG_ON_PAGE(!PageHead(page), page);
+       VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
        if (flags & FOLL_TOUCH)
                touch_pmd(vma, addr, pmd);
        if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
@@ -1116,7 +1116,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
        }
 skip_mlock:
        page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
-       VM_BUG_ON_PAGE(!PageCompound(page), page);
+       VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page);
        if (flags & FOLL_GET)
                get_page(page);
 
@@ -1512,7 +1512,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
        struct page *page;
        pgtable_t pgtable;
        pmd_t _pmd;
-       bool young, write, dirty;
+       bool young, write, dirty, soft_dirty;
        unsigned long addr;
        int i;
 
@@ -1546,6 +1546,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
        write = pmd_write(*pmd);
        young = pmd_young(*pmd);
        dirty = pmd_dirty(*pmd);
+       soft_dirty = pmd_soft_dirty(*pmd);
 
        pmdp_huge_split_prepare(vma, haddr, pmd);
        pgtable = pgtable_trans_huge_withdraw(mm, pmd);
@@ -1562,6 +1563,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                        swp_entry_t swp_entry;
                        swp_entry = make_migration_entry(page + i, write);
                        entry = swp_entry_to_pte(swp_entry);
+                       if (soft_dirty)
+                               entry = pte_swp_mksoft_dirty(entry);
                } else {
                        entry = mk_pte(page + i, vma->vm_page_prot);
                        entry = maybe_mkwrite(entry, vma);
@@ -1569,6 +1572,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                                entry = pte_wrprotect(entry);
                        if (!young)
                                entry = pte_mkold(entry);
+                       if (soft_dirty)
+                               entry = pte_mksoft_dirty(entry);
                }
                if (dirty)
                        SetPageDirty(page + i);
index 2ff0289..9a6a51a 100644 (file)
@@ -4082,24 +4082,6 @@ static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
        atomic_add(n, &memcg->id.ref);
 }
 
-static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
-{
-       while (!atomic_inc_not_zero(&memcg->id.ref)) {
-               /*
-                * The root cgroup cannot be destroyed, so it's refcount must
-                * always be >= 1.
-                */
-               if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
-                       VM_BUG_ON(1);
-                       break;
-               }
-               memcg = parent_mem_cgroup(memcg);
-               if (!memcg)
-                       memcg = root_mem_cgroup;
-       }
-       return memcg;
-}
-
 static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
 {
        if (atomic_sub_and_test(n, &memcg->id.ref)) {
@@ -5821,6 +5803,24 @@ static int __init mem_cgroup_init(void)
 subsys_initcall(mem_cgroup_init);
 
 #ifdef CONFIG_MEMCG_SWAP
+static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
+{
+       while (!atomic_inc_not_zero(&memcg->id.ref)) {
+               /*
+                * The root cgroup cannot be destroyed, so it's refcount must
+                * always be >= 1.
+                */
+               if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
+                       VM_BUG_ON(1);
+                       break;
+               }
+               memcg = parent_mem_cgroup(memcg);
+               if (!memcg)
+                       memcg = root_mem_cgroup;
+       }
+       return memcg;
+}
+
 /**
  * mem_cgroup_swapout - transfer a memsw charge to swap
  * @page: page whose memsw charge to transfer
index d8c4e38..2da72a5 100644 (file)
@@ -2336,6 +2336,23 @@ out:
        return ret;
 }
 
+/*
+ * Drop the (possibly final) reference to task->mempolicy.  It needs to be
+ * dropped after task->mempolicy is set to NULL so that any allocation done as
+ * part of its kmem_cache_free(), such as by KASAN, doesn't reference a freed
+ * policy.
+ */
+void mpol_put_task_policy(struct task_struct *task)
+{
+       struct mempolicy *pol;
+
+       task_lock(task);
+       pol = task->mempolicy;
+       task->mempolicy = NULL;
+       task_unlock(task);
+       mpol_put(pol);
+}
+
 static void sp_delete(struct shared_policy *sp, struct sp_node *n)
 {
        pr_debug("deleting %lx-l%lx\n", n->start, n->end);
index 3fbe73a..a2214c6 100644 (file)
@@ -3137,54 +3137,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        return NULL;
 }
 
-static inline bool
-should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
-                    enum compact_result compact_result,
-                    enum compact_priority *compact_priority,
-                    int compaction_retries)
-{
-       int max_retries = MAX_COMPACT_RETRIES;
-
-       if (!order)
-               return false;
-
-       /*
-        * compaction considers all the zone as desperately out of memory
-        * so it doesn't really make much sense to retry except when the
-        * failure could be caused by insufficient priority
-        */
-       if (compaction_failed(compact_result)) {
-               if (*compact_priority > MIN_COMPACT_PRIORITY) {
-                       (*compact_priority)--;
-                       return true;
-               }
-               return false;
-       }
-
-       /*
-        * make sure the compaction wasn't deferred or didn't bail out early
-        * due to locks contention before we declare that we should give up.
-        * But do not retry if the given zonelist is not suitable for
-        * compaction.
-        */
-       if (compaction_withdrawn(compact_result))
-               return compaction_zonelist_suitable(ac, order, alloc_flags);
-
-       /*
-        * !costly requests are much more important than __GFP_REPEAT
-        * costly ones because they are de facto nofail and invoke OOM
-        * killer to move on while costly can fail and users are ready
-        * to cope with that. 1/4 retries is rather arbitrary but we
-        * would need much more detailed feedback from compaction to
-        * make a better decision.
-        */
-       if (order > PAGE_ALLOC_COSTLY_ORDER)
-               max_retries /= 4;
-       if (compaction_retries <= max_retries)
-               return true;
-
-       return false;
-}
 #else
 static inline struct page *
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
@@ -3195,6 +3147,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        return NULL;
 }
 
+#endif /* CONFIG_COMPACTION */
+
 static inline bool
 should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags,
                     enum compact_result compact_result,
@@ -3221,7 +3175,6 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla
        }
        return false;
 }
-#endif /* CONFIG_COMPACTION */
 
 /* Perform direct synchronous page reclaim */
 static int
@@ -4407,7 +4360,7 @@ static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist,
        do {
                zone_type--;
                zone = pgdat->node_zones + zone_type;
-               if (populated_zone(zone)) {
+               if (managed_zone(zone)) {
                        zoneref_set_zone(zone,
                                &zonelist->_zonerefs[nr_zones++]);
                        check_highest_zone(zone_type);
@@ -4645,7 +4598,7 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes)
                for (j = 0; j < nr_nodes; j++) {
                        node = node_order[j];
                        z = &NODE_DATA(node)->node_zones[zone_type];
-                       if (populated_zone(z)) {
+                       if (managed_zone(z)) {
                                zoneref_set_zone(z,
                                        &zonelist->_zonerefs[pos++]);
                                check_highest_zone(zone_type);
index 65ec288..c8a955b 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/dax.h>
 #include <linux/gfp.h>
 #include <linux/export.h>
 #include <linux/blkdev.h>
@@ -544,6 +545,14 @@ do_readahead(struct address_space *mapping, struct file *filp,
        if (!mapping || !mapping->a_ops)
                return -EINVAL;
 
+       /*
+        * Readahead doesn't make sense for DAX inodes, but we don't want it
+        * to report a failure either.  Instead, we just return success and
+        * don't do any work.
+        */
+       if (dax_mapping(mapping))
+               return 0;
+
        return force_page_cache_readahead(mapping, filp, index, nr);
 }
 
index 8ebae91..089328f 100644 (file)
@@ -83,7 +83,7 @@ static bool overlaps(const void *ptr, unsigned long n, unsigned long low,
        unsigned long check_high = check_low + n;
 
        /* Does not overlap if entirely above or entirely below. */
-       if (check_low >= high || check_high < low)
+       if (check_low >= high || check_high <= low)
                return false;
 
        return true;
@@ -124,7 +124,7 @@ static inline const char *check_kernel_text_object(const void *ptr,
 static inline const char *check_bogus_address(const void *ptr, unsigned long n)
 {
        /* Reject if object wraps past end of memory. */
-       if (ptr + n < ptr)
+       if ((unsigned long)ptr + n < (unsigned long)ptr)
                return "<wrapped address>";
 
        /* Reject if NULL or ZERO-allocation. */
@@ -134,30 +134,15 @@ static inline const char *check_bogus_address(const void *ptr, unsigned long n)
        return NULL;
 }
 
-static inline const char *check_heap_object(const void *ptr, unsigned long n,
-                                           bool to_user)
+/* Checks for allocs that are marked in some way as spanning multiple pages. */
+static inline const char *check_page_span(const void *ptr, unsigned long n,
+                                         struct page *page, bool to_user)
 {
-       struct page *page, *endpage;
+#ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN
        const void *end = ptr + n - 1;
+       struct page *endpage;
        bool is_reserved, is_cma;
 
-       /*
-        * Some architectures (arm64) return true for virt_addr_valid() on
-        * vmalloced addresses. Work around this by checking for vmalloc
-        * first.
-        */
-       if (is_vmalloc_addr(ptr))
-               return NULL;
-
-       if (!virt_addr_valid(ptr))
-               return NULL;
-
-       page = virt_to_head_page(ptr);
-
-       /* Check slab allocator for flags and size. */
-       if (PageSlab(page))
-               return __check_heap_object(ptr, n, page);
-
        /*
         * Sometimes the kernel data regions are not marked Reserved (see
         * check below). And sometimes [_sdata,_edata) does not cover
@@ -186,7 +171,7 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n,
                   ((unsigned long)end & (unsigned long)PAGE_MASK)))
                return NULL;
 
-       /* Allow if start and end are inside the same compound page. */
+       /* Allow if fully inside the same compound (__GFP_COMP) page. */
        endpage = virt_to_head_page(end);
        if (likely(endpage == page))
                return NULL;
@@ -199,20 +184,44 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n,
        is_reserved = PageReserved(page);
        is_cma = is_migrate_cma_page(page);
        if (!is_reserved && !is_cma)
-               goto reject;
+               return "<spans multiple pages>";
 
        for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) {
                page = virt_to_head_page(ptr);
                if (is_reserved && !PageReserved(page))
-                       goto reject;
+                       return "<spans Reserved and non-Reserved pages>";
                if (is_cma && !is_migrate_cma_page(page))
-                       goto reject;
+                       return "<spans CMA and non-CMA pages>";
        }
+#endif
 
        return NULL;
+}
+
+static inline const char *check_heap_object(const void *ptr, unsigned long n,
+                                           bool to_user)
+{
+       struct page *page;
+
+       /*
+        * Some architectures (arm64) return true for virt_addr_valid() on
+        * vmalloced addresses. Work around this by checking for vmalloc
+        * first.
+        */
+       if (is_vmalloc_addr(ptr))
+               return NULL;
+
+       if (!virt_addr_valid(ptr))
+               return NULL;
+
+       page = virt_to_head_page(ptr);
+
+       /* Check slab allocator for flags and size. */
+       if (PageSlab(page))
+               return __check_heap_object(ptr, n, page);
 
-reject:
-       return "<spans multiple pages>";
+       /* Verify object does not incorrectly span multiple pages. */
+       return check_page_span(ptr, n, page, to_user);
 }
 
 /*
index 374d95d..b1e12a1 100644 (file)
@@ -1665,7 +1665,7 @@ static bool inactive_reclaimable_pages(struct lruvec *lruvec,
 
        for (zid = sc->reclaim_idx; zid >= 0; zid--) {
                zone = &pgdat->node_zones[zid];
-               if (!populated_zone(zone))
+               if (!managed_zone(zone))
                        continue;
 
                if (zone_page_state_snapshot(zone, NR_ZONE_LRU_BASE +
@@ -2036,7 +2036,7 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
                struct zone *zone = &pgdat->node_zones[zid];
                unsigned long inactive_zone, active_zone;
 
-               if (!populated_zone(zone))
+               if (!managed_zone(zone))
                        continue;
 
                inactive_zone = zone_page_state(zone,
@@ -2171,7 +2171,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
 
                for (z = 0; z < MAX_NR_ZONES; z++) {
                        struct zone *zone = &pgdat->node_zones[z];
-                       if (!populated_zone(zone))
+                       if (!managed_zone(zone))
                                continue;
 
                        total_high_wmark += high_wmark_pages(zone);
@@ -2510,7 +2510,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
        /* If compaction would go ahead or the allocation would succeed, stop */
        for (z = 0; z <= sc->reclaim_idx; z++) {
                struct zone *zone = &pgdat->node_zones[z];
-               if (!populated_zone(zone))
+               if (!managed_zone(zone))
                        continue;
 
                switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) {
@@ -2840,7 +2840,7 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
 
        for (i = 0; i <= ZONE_NORMAL; i++) {
                zone = &pgdat->node_zones[i];
-               if (!populated_zone(zone) ||
+               if (!managed_zone(zone) ||
                    pgdat_reclaimable_pages(pgdat) == 0)
                        continue;
 
@@ -3141,7 +3141,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx)
        for (i = 0; i <= classzone_idx; i++) {
                struct zone *zone = pgdat->node_zones + i;
 
-               if (!populated_zone(zone))
+               if (!managed_zone(zone))
                        continue;
 
                if (!zone_balanced(zone, order, classzone_idx))
@@ -3169,7 +3169,7 @@ static bool kswapd_shrink_node(pg_data_t *pgdat,
        sc->nr_to_reclaim = 0;
        for (z = 0; z <= sc->reclaim_idx; z++) {
                zone = pgdat->node_zones + z;
-               if (!populated_zone(zone))
+               if (!managed_zone(zone))
                        continue;
 
                sc->nr_to_reclaim += max(high_wmark_pages(zone), SWAP_CLUSTER_MAX);
@@ -3242,7 +3242,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
                if (buffer_heads_over_limit) {
                        for (i = MAX_NR_ZONES - 1; i >= 0; i--) {
                                zone = pgdat->node_zones + i;
-                               if (!populated_zone(zone))
+                               if (!managed_zone(zone))
                                        continue;
 
                                sc.reclaim_idx = i;
@@ -3262,7 +3262,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
                 */
                for (i = classzone_idx; i >= 0; i--) {
                        zone = pgdat->node_zones + i;
-                       if (!populated_zone(zone))
+                       if (!managed_zone(zone))
                                continue;
 
                        if (zone_balanced(zone, sc.order, classzone_idx))
@@ -3508,7 +3508,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
        pg_data_t *pgdat;
        int z;
 
-       if (!populated_zone(zone))
+       if (!managed_zone(zone))
                return;
 
        if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL))
@@ -3522,7 +3522,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
        /* Only wake kswapd if all zones are unbalanced */
        for (z = 0; z <= classzone_idx; z++) {
                zone = pgdat->node_zones + z;
-               if (!populated_zone(zone))
+               if (!managed_zone(zone))
                        continue;
 
                if (zone_balanced(zone, order, classzone_idx))
index f066781..10d2bdc 100644 (file)
@@ -1278,7 +1278,7 @@ out:
        return err;
 }
 
-#if defined(CONFIG_IPDDP) || defined(CONFIG_IPDDP_MODULE)
+#if IS_ENABLED(CONFIG_IPDDP)
 static __inline__ int is_ip_over_ddp(struct sk_buff *skb)
 {
        return skb->data[12] == 22;
index e574a7e..5d26938 100644 (file)
@@ -31,7 +31,7 @@
 #include <linux/atmlec.h>
 
 /* Proxy LEC knows about bridging */
-#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+#if IS_ENABLED(CONFIG_BRIDGE)
 #include "../bridge/br_private.h"
 
 static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 };
@@ -121,7 +121,7 @@ static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 /* Device structures */
 static struct net_device *dev_lec[MAX_LEC_ITF];
 
-#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+#if IS_ENABLED(CONFIG_BRIDGE)
 static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
 {
        char *buff;
@@ -155,7 +155,7 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
                sk->sk_data_ready(sk);
        }
 }
-#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */
+#endif /* IS_ENABLED(CONFIG_BRIDGE) */
 
 /*
  * Open/initialize the netdevice. This is called (in the current kernel)
@@ -222,7 +222,7 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb,
        pr_debug("skbuff head:%lx data:%lx tail:%lx end:%lx\n",
                 (long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb),
                 (long)skb_end_pointer(skb));
-#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+#if IS_ENABLED(CONFIG_BRIDGE)
        if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0)
                lec_handle_bridge(skb, dev);
 #endif
@@ -426,7 +426,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
                    (unsigned short)(0xffff & mesg->content.normal.flag);
                break;
        case l_should_bridge:
-#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+#if IS_ENABLED(CONFIG_BRIDGE)
        {
                pr_debug("%s: bridge zeppelin asks about %pM\n",
                         dev->name, mesg->content.proxy.mac_addr);
@@ -452,7 +452,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
                        sk->sk_data_ready(sk);
                }
        }
-#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */
+#endif /* IS_ENABLED(CONFIG_BRIDGE) */
                break;
        default:
                pr_info("%s: Unknown message type %d\n", dev->name, mesg->type);
index 0e98222..3b3b1a2 100644 (file)
@@ -1007,7 +1007,7 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
        if (!net_eq(dev_net(dev), &init_net))
                return NOTIFY_DONE;
 
-       if (dev->name == NULL || strncmp(dev->name, "lec", 3))
+       if (strncmp(dev->name, "lec", 3))
                return NOTIFY_DONE; /* we are only interested in lec:s */
 
        switch (event) {
index 833bb14..f20742c 100644 (file)
@@ -73,10 +73,21 @@ config BATMAN_ADV_MCAST
          reduce the air overhead while improving the reliability of
          multicast messages.
 
-config BATMAN_ADV_DEBUG
-       bool "B.A.T.M.A.N. debugging"
+config BATMAN_ADV_DEBUGFS
+       bool "batman-adv debugfs entries"
        depends on BATMAN_ADV
        depends on DEBUG_FS
+       default y
+       help
+         Enable this to export routing related debug tables via debugfs.
+         The information for each soft-interface and used hard-interface can be
+         found under batman_adv/
+
+         If unsure, say Y.
+
+config BATMAN_ADV_DEBUG
+       bool "B.A.T.M.A.N. debugging"
+       depends on BATMAN_ADV_DEBUGFS
        help
          This is an option for use by developers; most people should
          say N here. This enables compilation of support for
index a83fc6c..f724d3c 100644 (file)
@@ -24,14 +24,14 @@ batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_elp.o
 batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_ogm.o
 batman-adv-y += bitarray.o
 batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o
-batman-adv-$(CONFIG_DEBUG_FS) += debugfs.o
+batman-adv-$(CONFIG_BATMAN_ADV_DEBUGFS) += debugfs.o
 batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o
 batman-adv-y += fragmentation.o
 batman-adv-y += gateway_client.o
 batman-adv-y += gateway_common.o
 batman-adv-y += hard-interface.o
 batman-adv-y += hash.o
-batman-adv-y += icmp_socket.o
+batman-adv-$(CONFIG_BATMAN_ADV_DEBUGFS) += icmp_socket.o
 batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o
 batman-adv-y += main.o
 batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
index f2cc50d..623d043 100644 (file)
@@ -101,6 +101,7 @@ int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
        return 0;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
 {
        struct batadv_algo_ops *bat_algo_ops;
@@ -113,6 +114,7 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
 
        return 0;
 }
+#endif
 
 static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
 {
index 9ed4f1f..e2d18d0 100644 (file)
@@ -324,17 +324,18 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr)
        if (!orig_node->bat_iv.bcast_own_sum)
                goto free_orig_node;
 
+       kref_get(&orig_node->refcount);
        hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
                                     batadv_choose_orig, orig_node,
                                     &orig_node->hash_entry);
        if (hash_added != 0)
-               goto free_orig_node;
+               goto free_orig_node_hash;
 
        return orig_node;
 
-free_orig_node:
-       /* free twice, as batadv_orig_node_new sets refcount to 2 */
+free_orig_node_hash:
        batadv_orig_node_put(orig_node);
+free_orig_node:
        batadv_orig_node_put(orig_node);
 
        return NULL;
@@ -1854,6 +1855,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
        return NET_RX_SUCCESS;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_iv_ogm_orig_print_neigh - print neighbors for the originator table
  * @orig_node: the orig_node for which the neighbors are printed
@@ -1951,6 +1953,7 @@ next:
        if (batman_count == 0)
                seq_puts(seq, "No batman nodes in range ...\n");
 }
+#endif
 
 /**
  * batadv_iv_ogm_neigh_get_tq_avg - Get the TQ average for a neighbour on a
@@ -2181,6 +2184,7 @@ batadv_iv_ogm_orig_dump(struct sk_buff *msg, struct netlink_callback *cb,
        cb->args[2] = sub;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_iv_hardif_neigh_print - print a single hop neighbour node
  * @seq: neighbour table seq_file struct
@@ -2231,6 +2235,7 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv,
        if (batman_count == 0)
                seq_puts(seq, "No batman nodes in range ...\n");
 }
+#endif
 
 /**
  * batadv_iv_ogm_neigh_diff - calculate tq difference of two neighbors
@@ -2617,6 +2622,7 @@ out:
        return ret;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /* fails if orig_node has no router */
 static int batadv_iv_gw_write_buffer_text(struct batadv_priv *bat_priv,
                                          struct seq_file *seq,
@@ -2680,6 +2686,7 @@ static void batadv_iv_gw_print(struct batadv_priv *bat_priv,
        if (gw_count == 0)
                seq_puts(seq, "No gateways in range ...\n");
 }
+#endif
 
 /**
  * batadv_iv_gw_dump_entry - Dump a gateway into a message
@@ -2797,11 +2804,15 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
        .neigh = {
                .cmp = batadv_iv_ogm_neigh_cmp,
                .is_similar_or_better = batadv_iv_ogm_neigh_is_sob,
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
                .print = batadv_iv_neigh_print,
+#endif
                .dump = batadv_iv_ogm_neigh_dump,
        },
        .orig = {
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
                .print = batadv_iv_ogm_orig_print,
+#endif
                .dump = batadv_iv_ogm_orig_dump,
                .free = batadv_iv_ogm_orig_free,
                .add_if = batadv_iv_ogm_orig_add_if,
@@ -2810,7 +2821,9 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
        .gw = {
                .get_best_gw_node = batadv_iv_gw_get_best_gw_node,
                .is_eligible = batadv_iv_gw_is_eligible,
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
                .print = batadv_iv_gw_print,
+#endif
                .dump = batadv_iv_gw_dump,
        },
 };
index 9e872dc..e79f6f0 100644 (file)
@@ -129,6 +129,7 @@ batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
                  batadv_v_elp_throughput_metric_update);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_v_orig_print_neigh - print neighbors for the originator table
  * @orig_node: the orig_node for which the neighbors are printed
@@ -212,6 +213,7 @@ static void batadv_v_neigh_print(struct batadv_priv *bat_priv,
        if (batman_count == 0)
                seq_puts(seq, "No batman nodes in range ...\n");
 }
+#endif
 
 /**
  * batadv_v_neigh_dump_neigh - Dump a neighbour into a message
@@ -345,6 +347,7 @@ batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
        cb->args[1] = idx;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_v_orig_print - print the originator table
  * @bat_priv: the bat priv with all the soft interface information
@@ -411,6 +414,7 @@ next:
        if (batman_count == 0)
                seq_puts(seq, "No batman nodes in range ...\n");
 }
+#endif
 
 /**
  * batadv_v_orig_dump_subentry - Dump an originator subentry into a
@@ -827,6 +831,7 @@ out:
        return ret;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /* fails if orig_node has no router */
 static int batadv_v_gw_write_buffer_text(struct batadv_priv *bat_priv,
                                         struct seq_file *seq,
@@ -896,6 +901,7 @@ static void batadv_v_gw_print(struct batadv_priv *bat_priv,
        if (gw_count == 0)
                seq_puts(seq, "No gateways in range ...\n");
 }
+#endif
 
 /**
  * batadv_v_gw_dump_entry - Dump a gateway into a message
@@ -1034,11 +1040,15 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = {
                .hardif_init = batadv_v_hardif_neigh_init,
                .cmp = batadv_v_neigh_cmp,
                .is_similar_or_better = batadv_v_neigh_is_sob,
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
                .print = batadv_v_neigh_print,
+#endif
                .dump = batadv_v_neigh_dump,
        },
        .orig = {
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
                .print = batadv_v_orig_print,
+#endif
                .dump = batadv_v_orig_dump,
        },
        .gw = {
@@ -1046,7 +1056,9 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = {
                .show_sel_class = batadv_v_show_sel_class,
                .get_best_gw_node = batadv_v_gw_get_best_gw_node,
                .is_eligible = batadv_v_gw_is_eligible,
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
                .print = batadv_v_gw_print,
+#endif
                .dump = batadv_v_gw_dump,
        },
 };
index 6fbba4e..1aeeadc 100644 (file)
@@ -73,13 +73,12 @@ struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv,
        if (!orig_node)
                return NULL;
 
+       kref_get(&orig_node->refcount);
        hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
                                     batadv_choose_orig, orig_node,
                                     &orig_node->hash_entry);
        if (hash_added != 0) {
-               /* orig_node->refcounter is initialised to 2 by
-                * batadv_orig_node_new()
-                */
+               /* remove refcnt for newly created orig_node and hash entry */
                batadv_orig_node_put(orig_node);
                batadv_orig_node_put(orig_node);
                orig_node = NULL;
index 35ed1d3..e7f690b 100644 (file)
@@ -526,11 +526,9 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
        atomic_set(&entry->wait_periods, 0);
        ether_addr_copy(entry->orig, orig);
        INIT_WORK(&entry->report_work, batadv_bla_loopdetect_report);
-
-       /* one for the hash, one for returning */
        kref_init(&entry->refcount);
-       kref_get(&entry->refcount);
 
+       kref_get(&entry->refcount);
        hash_added = batadv_hash_add(bat_priv->bla.backbone_hash,
                                     batadv_compare_backbone_gw,
                                     batadv_choose_backbone_gw, entry,
@@ -718,12 +716,13 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
                claim->lasttime = jiffies;
                kref_get(&backbone_gw->refcount);
                claim->backbone_gw = backbone_gw;
-
                kref_init(&claim->refcount);
-               kref_get(&claim->refcount);
+
                batadv_dbg(BATADV_DBG_BLA, bat_priv,
                           "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n",
                           mac, BATADV_PRINT_VID(vid));
+
+               kref_get(&claim->refcount);
                hash_added = batadv_hash_add(bat_priv->bla.claim_hash,
                                             batadv_compare_claim,
                                             batadv_choose_claim, claim,
@@ -1997,6 +1996,7 @@ out:
        return ret;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_bla_claim_table_seq_print_text - print the claim table in a seq file
  * @seq: seq file to print on
@@ -2057,6 +2057,7 @@ out:
                batadv_hardif_put(primary_if);
        return 0;
 }
+#endif
 
 /**
  * batadv_bla_claim_dump_entry - dump one entry of the claim table
@@ -2220,6 +2221,7 @@ out:
        return ret;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq
  *  file
@@ -2283,6 +2285,7 @@ out:
                batadv_hardif_put(primary_if);
        return 0;
 }
+#endif
 
 /**
  * batadv_bla_backbone_dump_entry - dump one entry of the backbone table
index 1ab4e2e..c68ff3d 100644 (file)
@@ -26,7 +26,7 @@ struct net_device;
 
 #define BATADV_DEBUGFS_SUBDIR "batman_adv"
 
-#if IS_ENABLED(CONFIG_DEBUG_FS)
+#if IS_ENABLED(CONFIG_BATMAN_ADV_DEBUGFS)
 
 void batadv_debugfs_init(void);
 void batadv_debugfs_destroy(void);
index b1cc8bf..e257efd 100644 (file)
@@ -343,8 +343,8 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
        ether_addr_copy(dat_entry->mac_addr, mac_addr);
        dat_entry->last_update = jiffies;
        kref_init(&dat_entry->refcount);
-       kref_get(&dat_entry->refcount);
 
+       kref_get(&dat_entry->refcount);
        hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat,
                                     batadv_hash_dat, dat_entry,
                                     &dat_entry->hash_entry);
@@ -795,6 +795,7 @@ void batadv_dat_free(struct batadv_priv *bat_priv)
        batadv_dat_hash_free(bat_priv);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_dat_cache_seq_print_text - print the local DAT hash table
  * @seq: seq file to print on
@@ -846,6 +847,7 @@ out:
                batadv_hardif_put(primary_if);
        return 0;
 }
+#endif
 
 /**
  * batadv_arp_get_type - parse an ARP packet and gets the type
index c2928c2..de055d6 100644 (file)
@@ -339,14 +339,15 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
        if (!gw_node)
                return;
 
-       kref_get(&orig_node->refcount);
+       kref_init(&gw_node->refcount);
        INIT_HLIST_NODE(&gw_node->list);
+       kref_get(&orig_node->refcount);
        gw_node->orig_node = orig_node;
        gw_node->bandwidth_down = ntohl(gateway->bandwidth_down);
        gw_node->bandwidth_up = ntohl(gateway->bandwidth_up);
-       kref_init(&gw_node->refcount);
 
        spin_lock_bh(&bat_priv->gw.list_lock);
+       kref_get(&gw_node->refcount);
        hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list);
        spin_unlock_bh(&bat_priv->gw.list_lock);
 
@@ -357,6 +358,9 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
                   ntohl(gateway->bandwidth_down) % 10,
                   ntohl(gateway->bandwidth_up) / 10,
                   ntohl(gateway->bandwidth_up) % 10);
+
+       /* don't return reference to new gw_node */
+       batadv_gw_node_put(gw_node);
 }
 
 /**
@@ -478,6 +482,7 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv)
        spin_unlock_bh(&bat_priv->gw.list_lock);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
 {
        struct net_device *net_dev = (struct net_device *)seq->private;
@@ -505,6 +510,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
 
        return 0;
 }
+#endif
 
 /**
  * batadv_gw_dump - Dump gateways into a message
index 43c9a3e..08ce361 100644 (file)
@@ -694,6 +694,7 @@ batadv_hardif_add_interface(struct net_device *net_dev)
        INIT_HLIST_HEAD(&hard_iface->neigh_list);
 
        spin_lock_init(&hard_iface->neigh_list_lock);
+       kref_init(&hard_iface->refcount);
 
        hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT;
        if (batadv_is_wifi_netdev(net_dev))
@@ -701,11 +702,8 @@ batadv_hardif_add_interface(struct net_device *net_dev)
 
        batadv_v_hardif_init(hard_iface);
 
-       /* extra reference for return */
-       kref_init(&hard_iface->refcount);
-       kref_get(&hard_iface->refcount);
-
        batadv_check_known_mac_addr(hard_iface->net_dev);
+       kref_get(&hard_iface->refcount);
        list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list);
 
        return hard_iface;
@@ -727,7 +725,7 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface)
        /* first deactivate interface */
        if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
                batadv_hardif_disable_interface(hard_iface,
-                                               BATADV_IF_CLEANUP_AUTO);
+                                               BATADV_IF_CLEANUP_KEEP);
 
        if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
                return;
index 618d5de..e44a7da 100644 (file)
@@ -26,9 +26,25 @@ struct batadv_icmp_header;
 
 #define BATADV_ICMP_SOCKET "socket"
 
-void batadv_socket_init(void);
 int batadv_socket_setup(struct batadv_priv *bat_priv);
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+
+void batadv_socket_init(void);
 void batadv_socket_receive_packet(struct batadv_icmp_header *icmph,
                                  size_t icmp_len);
 
+#else
+
+static inline void batadv_socket_init(void)
+{
+}
+
+static inline void
+batadv_socket_receive_packet(struct batadv_icmp_header *icmph, size_t icmp_len)
+{
+}
+
+#endif
+
 #endif /* _NET_BATMAN_ADV_ICMP_SOCKET_H_ */
index ef07e5b..2c017ab 100644 (file)
@@ -282,6 +282,7 @@ bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr)
        return is_my_mac;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_seq_print_text_primary_if_get - called from debugfs table printing
  *  function that requires the primary interface
@@ -317,6 +318,7 @@ batadv_seq_print_text_primary_if_get(struct seq_file *seq)
 out:
        return primary_if;
 }
+#endif
 
 /**
  * batadv_max_header_len - calculate maximum encapsulation overhead for a
index 894df60..13661f4 100644 (file)
@@ -1134,6 +1134,7 @@ void batadv_mcast_init(struct batadv_priv *bat_priv)
                                     BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_mcast_flags_print_header - print own mcast flags to debugfs table
  * @bat_priv: the bat priv with all the soft interface information
@@ -1234,6 +1235,7 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
 
        return 0;
 }
+#endif
 
 /**
  * batadv_mcast_free - free the multicast optimizations structures
index 18831e7..64cb6ac 100644 (file)
@@ -62,11 +62,11 @@ enum batadv_netlink_multicast_groups {
        BATADV_NL_MCGRP_TPMETER,
 };
 
-static struct genl_multicast_group batadv_netlink_mcgrps[] = {
+static const struct genl_multicast_group batadv_netlink_mcgrps[] = {
        [BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER },
 };
 
-static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
+static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
        [BATADV_ATTR_VERSION]           = { .type = NLA_STRING },
        [BATADV_ATTR_ALGO_NAME]         = { .type = NLA_STRING },
        [BATADV_ATTR_MESH_IFINDEX]      = { .type = NLA_U32 },
index 293ef4f..e3baf69 100644 (file)
@@ -856,14 +856,12 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
        if (!nc_node)
                return NULL;
 
-       kref_get(&orig_neigh_node->refcount);
-
        /* Initialize nc_node */
        INIT_LIST_HEAD(&nc_node->list);
+       kref_init(&nc_node->refcount);
        ether_addr_copy(nc_node->addr, orig_node->orig);
+       kref_get(&orig_neigh_node->refcount);
        nc_node->orig_node = orig_neigh_node;
-       kref_init(&nc_node->refcount);
-       kref_get(&nc_node->refcount);
 
        /* Select ingoing or outgoing coding node */
        if (in_coding) {
@@ -879,6 +877,7 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
 
        /* Add nc_node to orig_node */
        spin_lock_bh(lock);
+       kref_get(&nc_node->refcount);
        list_add_tail_rcu(&nc_node->list, list);
        spin_unlock_bh(lock);
 
@@ -979,7 +978,6 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
        INIT_LIST_HEAD(&nc_path->packet_list);
        spin_lock_init(&nc_path->packet_list_lock);
        kref_init(&nc_path->refcount);
-       kref_get(&nc_path->refcount);
        nc_path->last_valid = jiffies;
        ether_addr_copy(nc_path->next_hop, dst);
        ether_addr_copy(nc_path->prev_hop, src);
@@ -989,6 +987,7 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
                   nc_path->next_hop);
 
        /* Add nc_path to hash table */
+       kref_get(&nc_path->refcount);
        hash_added = batadv_hash_add(hash, batadv_nc_hash_compare,
                                     batadv_nc_hash_choose, &nc_path_key,
                                     &nc_path->hash_entry);
@@ -1882,6 +1881,7 @@ void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
        batadv_hash_destroy(bat_priv->nc.decoding_hash);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_nc_nodes_seq_print_text - print the nc node information
  * @seq: seq file to print on
@@ -1981,3 +1981,4 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
 out:
        return -ENOMEM;
 }
+#endif
index 95c8555..5f3bfc4 100644 (file)
@@ -133,9 +133,9 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
                goto out;
 
        kref_init(&vlan->refcount);
-       kref_get(&vlan->refcount);
        vlan->vid = vid;
 
+       kref_get(&vlan->refcount);
        hlist_add_head_rcu(&vlan->list, &orig_node->vlan_list);
 
 out:
@@ -386,6 +386,7 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
        orig_ifinfo->if_outgoing = if_outgoing;
        INIT_HLIST_NODE(&orig_ifinfo->list);
        kref_init(&orig_ifinfo->refcount);
+
        kref_get(&orig_ifinfo->refcount);
        hlist_add_head_rcu(&orig_ifinfo->list,
                           &orig_node->ifinfo_list);
@@ -459,9 +460,9 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
 
        INIT_HLIST_NODE(&neigh_ifinfo->list);
        kref_init(&neigh_ifinfo->refcount);
-       kref_get(&neigh_ifinfo->refcount);
        neigh_ifinfo->if_outgoing = if_outgoing;
 
+       kref_get(&neigh_ifinfo->refcount);
        hlist_add_head_rcu(&neigh_ifinfo->list, &neigh->ifinfo_list);
 
 out:
@@ -653,8 +654,8 @@ batadv_neigh_node_create(struct batadv_orig_node *orig_node,
 
        /* extra reference for return */
        kref_init(&neigh_node->refcount);
-       kref_get(&neigh_node->refcount);
 
+       kref_get(&neigh_node->refcount);
        hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
 
        batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv,
@@ -692,6 +693,7 @@ batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
        return batadv_neigh_node_create(orig_node, hard_iface, neigh_addr);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_hardif_neigh_seq_print_text - print the single hop neighbour list
  * @seq: neighbour table seq_file struct
@@ -725,6 +727,7 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
        bat_priv->algo_ops->neigh.print(bat_priv, seq);
        return 0;
 }
+#endif
 
 /**
  * batadv_hardif_neigh_dump - Dump to netlink the neighbor infos for a specific
@@ -988,7 +991,6 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
 
        /* extra reference for return */
        kref_init(&orig_node->refcount);
-       kref_get(&orig_node->refcount);
 
        orig_node->bat_priv = bat_priv;
        ether_addr_copy(orig_node->orig, addr);
@@ -1339,6 +1341,7 @@ void batadv_purge_orig_ref(struct batadv_priv *bat_priv)
        _batadv_purge_orig(bat_priv);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
 {
        struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1412,6 +1415,7 @@ out:
                batadv_hardif_put(hard_iface);
        return 0;
 }
+#endif
 
 /**
  * batadv_orig_dump - Dump to netlink the originator infos for a specific
index e508bf5..49e16b6 100644 (file)
@@ -594,6 +594,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
        }
 
        spin_lock_bh(&bat_priv->softif_vlan_list_lock);
+       kref_get(&vlan->refcount);
        hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
        spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
 
@@ -604,6 +605,9 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
                            bat_priv->soft_iface->dev_addr, vid,
                            BATADV_NULL_IFINDEX, BATADV_NO_MARK);
 
+       /* don't return reference to new softif_vlan */
+       batadv_softif_vlan_put(vlan);
+
        return 0;
 }
 
index 2080407..7f66309 100644 (file)
@@ -734,7 +734,6 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
        if (batadv_is_wifi_netdev(in_dev))
                tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
        kref_init(&tt_local->common.refcount);
-       kref_get(&tt_local->common.refcount);
        tt_local->last_seen = jiffies;
        tt_local->common.added_at = tt_local->last_seen;
        tt_local->vlan = vlan;
@@ -746,6 +745,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
            is_multicast_ether_addr(addr))
                tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE;
 
+       kref_get(&tt_local->common.refcount);
        hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt,
                                     batadv_choose_tt, &tt_local->common,
                                     &tt_local->common.hash_entry);
@@ -1047,6 +1047,7 @@ container_register:
        kfree(tt_data);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
 {
        struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1114,6 +1115,7 @@ out:
                batadv_hardif_put(primary_if);
        return 0;
 }
+#endif
 
 /**
  * batadv_tt_local_dump_entry - Dump one TT local entry into a message
@@ -1567,9 +1569,9 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
        orig_entry->orig_node = orig_node;
        orig_entry->ttvn = ttvn;
        kref_init(&orig_entry->refcount);
-       kref_get(&orig_entry->refcount);
 
        spin_lock_bh(&tt_global->list_lock);
+       kref_get(&orig_entry->refcount);
        hlist_add_head_rcu(&orig_entry->list,
                           &tt_global->orig_list);
        spin_unlock_bh(&tt_global->list_lock);
@@ -1645,13 +1647,13 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
                if (flags & BATADV_TT_CLIENT_ROAM)
                        tt_global_entry->roam_at = jiffies;
                kref_init(&common->refcount);
-               kref_get(&common->refcount);
                common->added_at = jiffies;
 
                INIT_HLIST_HEAD(&tt_global_entry->orig_list);
                atomic_set(&tt_global_entry->orig_list_count, 0);
                spin_lock_init(&tt_global_entry->list_lock);
 
+               kref_get(&common->refcount);
                hash_added = batadv_hash_add(bat_priv->tt.global_hash,
                                             batadv_compare_tt,
                                             batadv_choose_tt, common,
@@ -1796,6 +1798,7 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
        return best_entry;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_tt_global_print_entry - print all orig nodes who announce the address
  *  for this global entry
@@ -1919,6 +1922,7 @@ out:
                batadv_hardif_put(primary_if);
        return 0;
 }
+#endif
 
 /**
  * batadv_tt_global_dump_subentry - Dump all TT local entries into a message
index 3d1cf0f..77654f0 100644 (file)
@@ -257,8 +257,13 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
        spin_lock_bh(&bat_priv->tvlv.container_list_lock);
        tvlv_old = batadv_tvlv_container_get(bat_priv, type, version);
        batadv_tvlv_container_remove(bat_priv, tvlv_old);
+
+       kref_get(&tvlv_new->refcount);
        hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list);
        spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+
+       /* don't return reference to new tvlv_container */
+       batadv_tvlv_container_put(tvlv_new);
 }
 
 /**
@@ -542,8 +547,12 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
        INIT_HLIST_NODE(&tvlv_handler->list);
 
        spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
+       kref_get(&tvlv_handler->refcount);
        hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list);
        spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
+
+       /* don't return reference to new tvlv_handler */
+       batadv_tvlv_handler_put(tvlv_handler);
 }
 
 /**
index b5f01a3..b3dd1a3 100644 (file)
@@ -1431,7 +1431,9 @@ struct batadv_algo_neigh_ops {
                                     struct batadv_hard_iface *if_outgoing1,
                                     struct batadv_neigh_node *neigh2,
                                     struct batadv_hard_iface *if_outgoing2);
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
        void (*print)(struct batadv_priv *priv, struct seq_file *seq);
+#endif
        void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
                     struct batadv_priv *priv,
                     struct batadv_hard_iface *hard_iface);
@@ -1453,8 +1455,10 @@ struct batadv_algo_orig_ops {
        int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num);
        int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num,
                      int del_if_num);
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
        void (*print)(struct batadv_priv *priv, struct seq_file *seq,
                      struct batadv_hard_iface *hard_iface);
+#endif
        void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
                     struct batadv_priv *priv,
                     struct batadv_hard_iface *hard_iface);
@@ -1480,7 +1484,9 @@ struct batadv_algo_gw_ops {
        bool (*is_eligible)(struct batadv_priv *bat_priv,
                            struct batadv_orig_node *curr_gw_orig,
                            struct batadv_orig_node *orig_node);
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
        void (*print)(struct batadv_priv *bat_priv, struct seq_file *seq);
+#endif
        void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
                     struct batadv_priv *priv);
 };
index ece45e0..0b5f729 100644 (file)
@@ -250,7 +250,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 
        skb_free_datagram(sk, skb);
 
-       if (msg->msg_flags & MSG_TRUNC)
+       if (flags & MSG_TRUNC)
                copied = skblen;
 
        return err ? : copied;
index c045b3c..b0e23df 100644 (file)
@@ -262,6 +262,8 @@ int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req,
                break;
        }
 
+       kfree_skb(hdev->req_skb);
+       hdev->req_skb = NULL;
        hdev->req_status = hdev->req_result = 0;
 
        BT_DBG("%s end: err %d", hdev->name, err);
index 6ef8a01..96f04b7 100644 (file)
@@ -1091,7 +1091,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
 
        skb_free_datagram(sk, skb);
 
-       if (msg->msg_flags & MSG_TRUNC)
+       if (flags & MSG_TRUNC)
                copied = skblen;
 
        return err ? : copied;
index 54ceb1f..d4cad29 100644 (file)
@@ -32,6 +32,7 @@
 
 #include <linux/debugfs.h>
 #include <linux/crc16.h>
+#include <linux/filter.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -5835,6 +5836,9 @@ static int l2cap_reassemble_sdu(struct l2cap_chan *chan, struct sk_buff *skb,
                if (chan->sdu)
                        break;
 
+               if (!pskb_may_pull(skb, L2CAP_SDULEN_SIZE))
+                       break;
+
                chan->sdu_len = get_unaligned_le16(skb->data);
                skb_pull(skb, L2CAP_SDULEN_SIZE);
 
@@ -6610,6 +6614,10 @@ static int l2cap_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
                goto drop;
        }
 
+       if ((chan->mode == L2CAP_MODE_ERTM ||
+            chan->mode == L2CAP_MODE_STREAMING) && sk_filter(chan->data, skb))
+               goto drop;
+
        if (!control->sframe) {
                int err;
 
index 1842141..a8ba752 100644 (file)
@@ -1019,7 +1019,7 @@ static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg,
                goto done;
 
        if (pi->rx_busy_skb) {
-               if (!sock_queue_rcv_skb(sk, pi->rx_busy_skb))
+               if (!__sock_queue_rcv_skb(sk, pi->rx_busy_skb))
                        pi->rx_busy_skb = NULL;
                else
                        goto done;
@@ -1270,7 +1270,17 @@ static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
                goto done;
        }
 
-       err = sock_queue_rcv_skb(sk, skb);
+       if (chan->mode != L2CAP_MODE_ERTM &&
+           chan->mode != L2CAP_MODE_STREAMING) {
+               /* Even if no filter is attached, we could potentially
+                * get errors from security modules, etc.
+                */
+               err = sk_filter(sk, skb);
+               if (err)
+                       goto done;
+       }
+
+       err = __sock_queue_rcv_skb(sk, skb);
 
        /* For ERTM, handle one skb that doesn't fit into the recv
         * buffer.  This is important to do because the data frames
index a1cda5d..0aefc01 100644 (file)
@@ -20,4 +20,6 @@ bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
 
 bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o
 
+bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o
+
 obj-$(CONFIG_NETFILTER) += netfilter/
index 09f2694..89a687f 100644 (file)
@@ -62,10 +62,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
                goto out;
 
        if (is_broadcast_ether_addr(dest)) {
-               br_flood(br, skb, false, false, true);
+               br_flood(br, skb, BR_PKT_BROADCAST, false, true);
        } else if (is_multicast_ether_addr(dest)) {
                if (unlikely(netpoll_tx_running(dev))) {
-                       br_flood(br, skb, false, false, true);
+                       br_flood(br, skb, BR_PKT_MULTICAST, false, true);
                        goto out;
                }
                if (br_multicast_rcv(br, NULL, skb, vid)) {
@@ -78,11 +78,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
                    br_multicast_querier_exists(br, eth_hdr(skb)))
                        br_multicast_flood(mdst, skb, false, true);
                else
-                       br_flood(br, skb, false, false, true);
+                       br_flood(br, skb, BR_PKT_MULTICAST, false, true);
        } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) {
                br_forward(dst->dst, skb, false, true);
        } else {
-               br_flood(br, skb, true, false, true);
+               br_flood(br, skb, BR_PKT_UNICAST, false, true);
        }
 out:
        rcu_read_unlock();
index cd620fa..6b43c8c 100644 (file)
@@ -710,24 +710,27 @@ int br_fdb_dump(struct sk_buff *skb,
                struct netlink_callback *cb,
                struct net_device *dev,
                struct net_device *filter_dev,
-               int idx)
+               int *idx)
 {
        struct net_bridge *br = netdev_priv(dev);
+       int err = 0;
        int i;
 
        if (!(dev->priv_flags & IFF_EBRIDGE))
                goto out;
 
-       if (!filter_dev)
-               idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
+       if (!filter_dev) {
+               err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
+               if (err < 0)
+                       goto out;
+       }
 
        for (i = 0; i < BR_HASH_SIZE; i++) {
                struct net_bridge_fdb_entry *f;
 
                hlist_for_each_entry_rcu(f, &br->hash[i], hlist) {
-                       int err;
 
-                       if (idx < cb->args[0])
+                       if (*idx < cb->args[2])
                                goto skip;
 
                        if (filter_dev &&
@@ -750,17 +753,15 @@ int br_fdb_dump(struct sk_buff *skb,
                                            cb->nlh->nlmsg_seq,
                                            RTM_NEWNEIGH,
                                            NLM_F_MULTI);
-                       if (err < 0) {
-                               cb->args[1] = err;
-                               break;
-                       }
+                       if (err < 0)
+                               goto out;
 skip:
-                       ++idx;
+                       *idx += 1;
                }
        }
 
 out:
-       return idx;
+       return err;
 }
 
 /* Update (create or replace) forwarding database entry */
index 63a83d8..7cb41ae 100644 (file)
@@ -29,7 +29,8 @@ static inline int should_deliver(const struct net_bridge_port *p,
 
        vg = nbp_vlan_group_rcu(p);
        return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
-               br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING;
+               br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING &&
+               nbp_switchdev_allowed_egress(p, skb);
 }
 
 int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -175,7 +176,7 @@ out:
 
 /* called under rcu_read_lock */
 void br_flood(struct net_bridge *br, struct sk_buff *skb,
-             bool unicast, bool local_rcv, bool local_orig)
+             enum br_pkt_type pkt_type, bool local_rcv, bool local_orig)
 {
        u8 igmp_type = br_multicast_igmp_type(skb);
        struct net_bridge_port *prev = NULL;
@@ -183,7 +184,10 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
 
        list_for_each_entry_rcu(p, &br->port_list, list) {
                /* Do not flood unicast traffic to ports that turn it off */
-               if (unicast && !(p->flags & BR_FLOOD))
+               if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD))
+                       continue;
+               if (pkt_type == BR_PKT_MULTICAST &&
+                   !(p->flags & BR_MCAST_FLOOD))
                        continue;
 
                /* Do not flood to ports that enable proxy ARP */
index f2fede0..ed0dd33 100644 (file)
@@ -362,7 +362,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
        p->path_cost = port_cost(dev);
        p->priority = 0x8000 >> BR_PORT_BITS;
        p->port_no = index;
-       p->flags = BR_LEARNING | BR_FLOOD;
+       p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD;
        br_init_port(p);
        br_set_state(p, BR_STATE_DISABLED);
        br_stp_port_timer_init(p);
@@ -545,6 +545,10 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
        if (err)
                goto err5;
 
+       err = nbp_switchdev_mark_set(p);
+       if (err)
+               goto err6;
+
        dev_disable_lro(dev);
 
        list_add_rcu(&p->list, &br->port_list);
@@ -566,7 +570,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
        err = nbp_vlan_init(p);
        if (err) {
                netdev_err(dev, "failed to initialize vlan filtering on this port\n");
-               goto err6;
+               goto err7;
        }
 
        spin_lock_bh(&br->lock);
@@ -589,12 +593,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
        return 0;
 
-err6:
+err7:
        list_del_rcu(&p->list);
        br_fdb_delete_by_port(br, p, 0, 1);
        nbp_update_port_count(br);
+err6:
        netdev_upper_dev_unlink(dev, br->dev);
-
 err5:
        dev->priv_flags &= ~IFF_BRIDGE_PORT;
        netdev_rx_handler_unregister(dev);
index 8e48620..855b72f 100644 (file)
@@ -80,13 +80,10 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
 
        BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
 
-       if (dev->flags & IFF_NOARP)
+       if ((dev->flags & IFF_NOARP) ||
+           !pskb_may_pull(skb, arp_hdr_len(dev)))
                return;
 
-       if (!pskb_may_pull(skb, arp_hdr_len(dev))) {
-               dev->stats.tx_dropped++;
-               return;
-       }
        parp = arp_hdr(skb);
 
        if (parp->ar_pro != htons(ETH_P_IP) ||
@@ -131,11 +128,12 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
 /* note: already called with rcu_read_lock */
 int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-       bool local_rcv = false, mcast_hit = false, unicast = true;
        struct net_bridge_port *p = br_port_get_rcu(skb->dev);
        const unsigned char *dest = eth_hdr(skb)->h_dest;
+       enum br_pkt_type pkt_type = BR_PKT_UNICAST;
        struct net_bridge_fdb_entry *dst = NULL;
        struct net_bridge_mdb_entry *mdst;
+       bool local_rcv, mcast_hit = false;
        struct net_bridge *br;
        u16 vid = 0;
 
@@ -145,29 +143,36 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
        if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid))
                goto out;
 
+       nbp_switchdev_frame_mark(p, skb);
+
        /* insert into forwarding database after filtering to avoid spoofing */
        br = p->br;
        if (p->flags & BR_LEARNING)
                br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false);
 
-       if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) &&
-           br_multicast_rcv(br, p, skb, vid))
-               goto drop;
+       local_rcv = !!(br->dev->flags & IFF_PROMISC);
+       if (is_multicast_ether_addr(dest)) {
+               /* by definition the broadcast is also a multicast address */
+               if (is_broadcast_ether_addr(dest)) {
+                       pkt_type = BR_PKT_BROADCAST;
+                       local_rcv = true;
+               } else {
+                       pkt_type = BR_PKT_MULTICAST;
+                       if (br_multicast_rcv(br, p, skb, vid))
+                               goto drop;
+               }
+       }
 
        if (p->state == BR_STATE_LEARNING)
                goto drop;
 
        BR_INPUT_SKB_CB(skb)->brdev = br->dev;
 
-       local_rcv = !!(br->dev->flags & IFF_PROMISC);
-
        if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP))
                br_do_proxy_arp(skb, br, vid, p);
 
-       if (is_broadcast_ether_addr(dest)) {
-               local_rcv = true;
-               unicast = false;
-       } else if (is_multicast_ether_addr(dest)) {
+       switch (pkt_type) {
+       case BR_PKT_MULTICAST:
                mdst = br_mdb_get(br, skb, vid);
                if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
                    br_multicast_querier_exists(br, eth_hdr(skb))) {
@@ -181,18 +186,22 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
                        local_rcv = true;
                        br->dev->stats.multicast++;
                }
-               unicast = false;
-       } else if ((dst = __br_fdb_get(br, dest, vid)) && dst->is_local) {
-               /* Do not forward the packet since it's local. */
-               return br_pass_frame_up(skb);
+               break;
+       case BR_PKT_UNICAST:
+               dst = __br_fdb_get(br, dest, vid);
+       default:
+               break;
        }
 
        if (dst) {
+               if (dst->is_local)
+                       return br_pass_frame_up(skb);
+
                dst->used = jiffies;
                br_forward(dst->dst, skb, local_rcv, false);
        } else {
                if (!mcast_hit)
-                       br_flood(br, skb, unicast, local_rcv, false);
+                       br_flood(br, skb, pkt_type, local_rcv, false);
                else
                        br_multicast_flood(mdst, skb, local_rcv, false);
        }
index a5423a1..c5fea93 100644 (file)
@@ -1138,7 +1138,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
                } else {
                        err = br_ip6_multicast_add_group(br, port,
                                                         &grec->grec_mca, vid);
-                       if (!err)
+                       if (err)
                                break;
                }
        }
index f2a29e4..e99037c 100644 (file)
@@ -169,10 +169,15 @@ static int br_port_fill_attrs(struct sk_buff *skb,
            nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) ||
            nla_put_u8(skb, IFLA_BRPORT_MODE, mode) ||
            nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) ||
-           nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) ||
-           nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) ||
+           nla_put_u8(skb, IFLA_BRPORT_PROTECT,
+                      !!(p->flags & BR_ROOT_BLOCK)) ||
+           nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE,
+                      !!(p->flags & BR_MULTICAST_FAST_LEAVE)) ||
            nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) ||
-           nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) ||
+           nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD,
+                      !!(p->flags & BR_FLOOD)) ||
+           nla_put_u8(skb, IFLA_BRPORT_MCAST_FLOOD,
+                      !!(p->flags & BR_MCAST_FLOOD)) ||
            nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) ||
            nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI,
                       !!(p->flags & BR_PROXYARP_WIFI)) ||
@@ -630,6 +635,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
        br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK);
        br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING);
        br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD);
+       br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD);
        br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP);
        br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI);
 
@@ -1245,14 +1251,30 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
        return 0;
 }
 
-static size_t bridge_get_linkxstats_size(const struct net_device *dev)
+static size_t br_get_linkxstats_size(const struct net_device *dev, int attr)
 {
-       struct net_bridge *br = netdev_priv(dev);
+       struct net_bridge_port *p = NULL;
        struct net_bridge_vlan_group *vg;
        struct net_bridge_vlan *v;
+       struct net_bridge *br;
        int numvls = 0;
 
-       vg = br_vlan_group(br);
+       switch (attr) {
+       case IFLA_STATS_LINK_XSTATS:
+               br = netdev_priv(dev);
+               vg = br_vlan_group(br);
+               break;
+       case IFLA_STATS_LINK_XSTATS_SLAVE:
+               p = br_port_get_rtnl(dev);
+               if (!p)
+                       return 0;
+               br = p->br;
+               vg = nbp_vlan_group(p);
+               break;
+       default:
+               return 0;
+       }
+
        if (vg) {
                /* we need to count all, even placeholder entries */
                list_for_each_entry(v, &vg->vlan_list, vlist)
@@ -1264,45 +1286,42 @@ static size_t bridge_get_linkxstats_size(const struct net_device *dev)
               nla_total_size(0);
 }
 
-static size_t brport_get_linkxstats_size(const struct net_device *dev)
-{
-       return nla_total_size(sizeof(struct br_mcast_stats)) +
-              nla_total_size(0);
-}
-
-static size_t br_get_linkxstats_size(const struct net_device *dev, int attr)
+static int br_fill_linkxstats(struct sk_buff *skb,
+                             const struct net_device *dev,
+                             int *prividx, int attr)
 {
-       size_t retsize = 0;
+       struct nlattr *nla __maybe_unused;
+       struct net_bridge_port *p = NULL;
+       struct net_bridge_vlan_group *vg;
+       struct net_bridge_vlan *v;
+       struct net_bridge *br;
+       struct nlattr *nest;
+       int vl_idx = 0;
 
        switch (attr) {
        case IFLA_STATS_LINK_XSTATS:
-               retsize = bridge_get_linkxstats_size(dev);
+               br = netdev_priv(dev);
+               vg = br_vlan_group(br);
                break;
        case IFLA_STATS_LINK_XSTATS_SLAVE:
-               retsize = brport_get_linkxstats_size(dev);
+               p = br_port_get_rtnl(dev);
+               if (!p)
+                       return 0;
+               br = p->br;
+               vg = nbp_vlan_group(p);
                break;
+       default:
+               return -EINVAL;
        }
 
-       return retsize;
-}
-
-static int bridge_fill_linkxstats(struct sk_buff *skb,
-                                 const struct net_device *dev,
-                                 int *prividx)
-{
-       struct net_bridge *br = netdev_priv(dev);
-       struct nlattr *nla __maybe_unused;
-       struct net_bridge_vlan_group *vg;
-       struct net_bridge_vlan *v;
-       struct nlattr *nest;
-       int vl_idx = 0;
-
        nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE);
        if (!nest)
                return -EMSGSIZE;
 
-       vg = br_vlan_group(br);
        if (vg) {
+               u16 pvid;
+
+               pvid = br_get_pvid(vg);
                list_for_each_entry(v, &vg->vlan_list, vlist) {
                        struct bridge_vlan_xstats vxi;
                        struct br_vlan_stats stats;
@@ -1311,6 +1330,9 @@ static int bridge_fill_linkxstats(struct sk_buff *skb,
                                continue;
                        memset(&vxi, 0, sizeof(vxi));
                        vxi.vid = v->vid;
+                       vxi.flags = v->flags;
+                       if (v->vid == pvid)
+                               vxi.flags |= BRIDGE_VLAN_INFO_PVID;
                        br_vlan_get_stats(v, &stats);
                        vxi.rx_bytes = stats.rx_bytes;
                        vxi.rx_packets = stats.rx_packets;
@@ -1329,7 +1351,7 @@ static int bridge_fill_linkxstats(struct sk_buff *skb,
                                        BRIDGE_XSTATS_PAD);
                if (!nla)
                        goto nla_put_failure;
-               br_multicast_get_stats(br, NULL, nla_data(nla));
+               br_multicast_get_stats(br, p, nla_data(nla));
        }
 #endif
        nla_nest_end(skb, nest);
@@ -1344,52 +1366,6 @@ nla_put_failure:
        return -EMSGSIZE;
 }
 
-static int brport_fill_linkxstats(struct sk_buff *skb,
-                                 const struct net_device *dev,
-                                 int *prividx)
-{
-       struct net_bridge_port *p = br_port_get_rtnl(dev);
-       struct nlattr *nla __maybe_unused;
-       struct nlattr *nest;
-
-       if (!p)
-               return 0;
-
-       nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE);
-       if (!nest)
-               return -EMSGSIZE;
-#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
-       nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_MCAST,
-                               sizeof(struct br_mcast_stats),
-                               BRIDGE_XSTATS_PAD);
-       if (!nla) {
-               nla_nest_end(skb, nest);
-               return -EMSGSIZE;
-       }
-       br_multicast_get_stats(p->br, p, nla_data(nla));
-#endif
-       nla_nest_end(skb, nest);
-
-       return 0;
-}
-
-static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev,
-                             int *prividx, int attr)
-{
-       int ret = -EINVAL;
-
-       switch (attr) {
-       case IFLA_STATS_LINK_XSTATS:
-               ret = bridge_fill_linkxstats(skb, dev, prividx);
-               break;
-       case IFLA_STATS_LINK_XSTATS_SLAVE:
-               ret = brport_fill_linkxstats(skb, dev, prividx);
-               break;
-       }
-
-       return ret;
-}
-
 static struct rtnl_af_ops br_af_ops __read_mostly = {
        .family                 = AF_BRIDGE,
        .get_link_af_size       = br_get_link_af_size_filtered,
index aac2a6e..1b63177 100644 (file)
@@ -251,6 +251,9 @@ struct net_bridge_port
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
        struct net_bridge_vlan_group    __rcu *vlgrp;
 #endif
+#ifdef CONFIG_NET_SWITCHDEV
+       int                             offload_fwd_mark;
+#endif
 };
 
 #define br_auto_port(p) ((p)->flags & BR_AUTO_MASK)
@@ -359,6 +362,11 @@ struct net_bridge
        struct timer_list               gc_timer;
        struct kobject                  *ifobj;
        u32                             auto_cnt;
+
+#ifdef CONFIG_NET_SWITCHDEV
+       int offload_fwd_mark;
+#endif
+
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
        struct net_bridge_vlan_group    __rcu *vlgrp;
        u8                              vlan_enabled;
@@ -381,6 +389,10 @@ struct br_input_skb_cb {
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
        bool vlan_filtered;
 #endif
+
+#ifdef CONFIG_NET_SWITCHDEV
+       int offload_fwd_mark;
+#endif
 };
 
 #define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb)
@@ -496,7 +508,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
               const unsigned char *addr, u16 vid, u16 nlh_flags);
 int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
-               struct net_device *dev, struct net_device *fdev, int idx);
+               struct net_device *dev, struct net_device *fdev, int *idx);
 int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
 void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
 int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
@@ -505,12 +517,17 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
                              const unsigned char *addr, u16 vid);
 
 /* br_forward.c */
+enum br_pkt_type {
+       BR_PKT_UNICAST,
+       BR_PKT_MULTICAST,
+       BR_PKT_BROADCAST
+};
 int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb);
 void br_forward(const struct net_bridge_port *to, struct sk_buff *skb,
                bool local_rcv, bool local_orig);
 int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
 void br_flood(struct net_bridge *br, struct sk_buff *skb,
-             bool unicast, bool local_rcv, bool local_orig);
+             enum br_pkt_type pkt_type, bool local_rcv, bool local_orig);
 
 /* br_if.c */
 void br_port_carrier_check(struct net_bridge_port *p);
@@ -1034,4 +1051,29 @@ static inline int br_sysfs_addbr(struct net_device *dev) { return 0; }
 static inline void br_sysfs_delbr(struct net_device *dev) { return; }
 #endif /* CONFIG_SYSFS */
 
+/* br_switchdev.c */
+#ifdef CONFIG_NET_SWITCHDEV
+int nbp_switchdev_mark_set(struct net_bridge_port *p);
+void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
+                             struct sk_buff *skb);
+bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
+                                 const struct sk_buff *skb);
+#else
+static inline int nbp_switchdev_mark_set(struct net_bridge_port *p)
+{
+       return 0;
+}
+
+static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
+                                           struct sk_buff *skb)
+{
+}
+
+static inline bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
+                                               const struct sk_buff *skb)
+{
+       return true;
+}
+#endif /* CONFIG_NET_SWITCHDEV */
+
 #endif
index 341caa0..d8ad73b 100644 (file)
@@ -134,17 +134,36 @@ void br_stp_disable_port(struct net_bridge_port *p)
                br_become_root_bridge(br);
 }
 
-static void br_stp_start(struct net_bridge *br)
+static int br_stp_call_user(struct net_bridge *br, char *arg)
 {
-       int r;
-       char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL };
+       char *argv[] = { BR_STP_PROG, br->dev->name, arg, NULL };
        char *envp[] = { NULL };
+       int rc;
+
+       /* call userspace STP and report program errors */
+       rc = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+       if (rc > 0) {
+               if (rc & 0xff)
+                       br_debug(br, BR_STP_PROG " received signal %d\n",
+                                rc & 0x7f);
+               else
+                       br_debug(br, BR_STP_PROG " exited with code %d\n",
+                                (rc >> 8) & 0xff);
+       }
+
+       return rc;
+}
+
+static void br_stp_start(struct net_bridge *br)
+{
        struct net_bridge_port *p;
+       int err = -ENOENT;
 
        if (net_eq(dev_net(br->dev), &init_net))
-               r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
-       else
-               r = -ENOENT;
+               err = br_stp_call_user(br, "start");
+
+       if (err && err != -ENOENT)
+               br_err(br, "failed to start userspace STP (%d)\n", err);
 
        spin_lock_bh(&br->lock);
 
@@ -153,9 +172,10 @@ static void br_stp_start(struct net_bridge *br)
        else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY)
                __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY);
 
-       if (r == 0) {
+       if (!err) {
                br->stp_enabled = BR_USER_STP;
                br_debug(br, "userspace STP started\n");
+
                /* Stop hello and hold timers */
                del_timer(&br->hello_timer);
                list_for_each_entry(p, &br->port_list, list)
@@ -173,14 +193,13 @@ static void br_stp_start(struct net_bridge *br)
 
 static void br_stp_stop(struct net_bridge *br)
 {
-       int r;
-       char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL };
-       char *envp[] = { NULL };
        struct net_bridge_port *p;
+       int err;
 
        if (br->stp_enabled == BR_USER_STP) {
-               r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
-               br_info(br, "userspace STP stopped, return code %d\n", r);
+               err = br_stp_call_user(br, "stop");
+               if (err)
+                       br_err(br, "failed to stop userspace STP (%d)\n", err);
 
                /* To start timers on any ports left in blocking */
                mod_timer(&br->hello_timer, jiffies + br->hello_time);
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
new file mode 100644 (file)
index 0000000..f4097b9
--- /dev/null
@@ -0,0 +1,57 @@
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <net/switchdev.h>
+
+#include "br_private.h"
+
+static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev)
+{
+       struct net_bridge_port *p;
+
+       /* dev is yet to be added to the port list. */
+       list_for_each_entry(p, &br->port_list, list) {
+               if (switchdev_port_same_parent_id(dev, p->dev))
+                       return p->offload_fwd_mark;
+       }
+
+       return ++br->offload_fwd_mark;
+}
+
+int nbp_switchdev_mark_set(struct net_bridge_port *p)
+{
+       struct switchdev_attr attr = {
+               .orig_dev = p->dev,
+               .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
+       };
+       int err;
+
+       ASSERT_RTNL();
+
+       err = switchdev_port_attr_get(p->dev, &attr);
+       if (err) {
+               if (err == -EOPNOTSUPP)
+                       return 0;
+               return err;
+       }
+
+       p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev);
+
+       return 0;
+}
+
+void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
+                             struct sk_buff *skb)
+{
+       if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark))
+               BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark;
+}
+
+bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
+                                 const struct sk_buff *skb)
+{
+       return !skb->offload_fwd_mark ||
+              BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark;
+}
index 1e04d4d..e657258 100644 (file)
@@ -171,6 +171,7 @@ BRPORT_ATTR_FLAG(learning, BR_LEARNING);
 BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD);
 BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP);
 BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI);
+BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD);
 
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
index cceac5b..0833c25 100644 (file)
@@ -368,6 +368,8 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
 
        match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0);
        if (IS_ERR(match) || match->family != NFPROTO_BRIDGE) {
+               if (!IS_ERR(match))
+                       module_put(match->me);
                request_module("ebt_%s", m->u.name);
                match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0);
        }
index 5d9953a..1663df5 100644 (file)
@@ -50,8 +50,7 @@ static struct nf_logger nf_bridge_logger __read_mostly = {
 
 static int __net_init nf_log_bridge_net_init(struct net *net)
 {
-       nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger);
-       return 0;
+       return nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger);
 }
 
 static void __net_exit nf_log_bridge_net_exit(struct net *net)
index 4b901d9..ad47a92 100644 (file)
@@ -86,6 +86,7 @@ static const struct nft_expr_ops nft_meta_bridge_set_ops = {
        .init           = nft_meta_set_init,
        .destroy        = nft_meta_set_destroy,
        .dump           = nft_meta_set_dump,
+       .validate       = nft_meta_set_validate,
 };
 
 static const struct nft_expr_ops *
index a75df86..9dbece2 100644 (file)
@@ -3355,16 +3355,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
        else
                skb_dst_force(skb);
 
-#ifdef CONFIG_NET_SWITCHDEV
-       /* Don't forward if offload device already forwarded */
-       if (skb->offload_fwd_mark &&
-           skb->offload_fwd_mark == dev->offload_fwd_mark) {
-               consume_skb(skb);
-               rc = NET_XMIT_SUCCESS;
-               goto out;
-       }
-#endif
-
        txq = netdev_pick_tx(dev, skb, accel_priv);
        q = rcu_dereference_bh(txq->qdisc);
 
@@ -3914,8 +3904,7 @@ static void net_tx_action(struct softirq_action *h)
        }
 }
 
-#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
-    (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
+#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE)
 /* This hook is defined here for ATM LANE */
 int (*br_fdb_test_addr_hook)(struct net_device *dev,
                             unsigned char *addr) __read_mostly;
@@ -3974,6 +3963,22 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
        return skb;
 }
 
+/**
+ *     netdev_is_rx_handler_busy - check if receive handler is registered
+ *     @dev: device to check
+ *
+ *     Check if a receive handler is already registered for a given device.
+ *     Return true if there one.
+ *
+ *     The caller must hold the rtnl_mutex.
+ */
+bool netdev_is_rx_handler_busy(struct net_device *dev)
+{
+       ASSERT_RTNL();
+       return dev && rtnl_dereference(dev->rx_handler);
+}
+EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy);
+
 /**
  *     netdev_rx_handler_register - register receive handler
  *     @dev: device to register a handler for
@@ -4292,32 +4297,53 @@ int netif_receive_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
-/* Network device is going away, flush any packets still pending
- * Called with irqs disabled.
- */
-static void flush_backlog(void *arg)
+DEFINE_PER_CPU(struct work_struct, flush_works);
+
+/* Network device is going away, flush any packets still pending */
+static void flush_backlog(struct work_struct *work)
 {
-       struct net_device *dev = arg;
-       struct softnet_data *sd = this_cpu_ptr(&softnet_data);
        struct sk_buff *skb, *tmp;
+       struct softnet_data *sd;
+
+       local_bh_disable();
+       sd = this_cpu_ptr(&softnet_data);
 
+       local_irq_disable();
        rps_lock(sd);
        skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
-               if (skb->dev == dev) {
+               if (skb->dev->reg_state == NETREG_UNREGISTERING) {
                        __skb_unlink(skb, &sd->input_pkt_queue);
                        kfree_skb(skb);
                        input_queue_head_incr(sd);
                }
        }
        rps_unlock(sd);
+       local_irq_enable();
 
        skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
-               if (skb->dev == dev) {
+               if (skb->dev->reg_state == NETREG_UNREGISTERING) {
                        __skb_unlink(skb, &sd->process_queue);
                        kfree_skb(skb);
                        input_queue_head_incr(sd);
                }
        }
+       local_bh_enable();
+}
+
+static void flush_all_backlogs(void)
+{
+       unsigned int cpu;
+
+       get_online_cpus();
+
+       for_each_online_cpu(cpu)
+               queue_work_on(cpu, system_highpri_wq,
+                             per_cpu_ptr(&flush_works, cpu));
+
+       for_each_online_cpu(cpu)
+               flush_work(per_cpu_ptr(&flush_works, cpu));
+
+       put_online_cpus();
 }
 
 static int napi_gro_complete(struct sk_buff *skb)
@@ -4805,8 +4831,9 @@ static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
 
 static int process_backlog(struct napi_struct *napi, int quota)
 {
-       int work = 0;
        struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
+       bool again = true;
+       int work = 0;
 
        /* Check if we have pending ipi, its better to send them now,
         * not waiting net_rx_action() end.
@@ -4817,23 +4844,20 @@ static int process_backlog(struct napi_struct *napi, int quota)
        }
 
        napi->weight = weight_p;
-       local_irq_disable();
-       while (1) {
+       while (again) {
                struct sk_buff *skb;
 
                while ((skb = __skb_dequeue(&sd->process_queue))) {
                        rcu_read_lock();
-                       local_irq_enable();
                        __netif_receive_skb(skb);
                        rcu_read_unlock();
-                       local_irq_disable();
                        input_queue_head_incr(sd);
-                       if (++work >= quota) {
-                               local_irq_enable();
+                       if (++work >= quota)
                                return work;
-                       }
+
                }
 
+               local_irq_disable();
                rps_lock(sd);
                if (skb_queue_empty(&sd->input_pkt_queue)) {
                        /*
@@ -4845,16 +4869,14 @@ static int process_backlog(struct napi_struct *napi, int quota)
                         * and we dont need an smp_mb() memory barrier.
                         */
                        napi->state = 0;
-                       rps_unlock(sd);
-
-                       break;
+                       again = false;
+               } else {
+                       skb_queue_splice_tail_init(&sd->input_pkt_queue,
+                                                  &sd->process_queue);
                }
-
-               skb_queue_splice_tail_init(&sd->input_pkt_queue,
-                                          &sd->process_queue);
                rps_unlock(sd);
+               local_irq_enable();
        }
-       local_irq_enable();
 
        return work;
 }
@@ -6707,8 +6729,8 @@ static void rollback_registered_many(struct list_head *head)
                unlist_netdevice(dev);
 
                dev->reg_state = NETREG_UNREGISTERING;
-               on_each_cpu(flush_backlog, dev, 1);
        }
+       flush_all_backlogs();
 
        synchronize_net();
 
@@ -8273,8 +8295,11 @@ static int __init net_dev_init(void)
         */
 
        for_each_possible_cpu(i) {
+               struct work_struct *flush = per_cpu_ptr(&flush_works, i);
                struct softnet_data *sd = &per_cpu(softnet_data, i);
 
+               INIT_WORK(flush, flush_backlog);
+
                skb_queue_head_init(&sd->input_pkt_queue);
                skb_queue_head_init(&sd->process_queue);
                INIT_LIST_HEAD(&sd->poll_list);
index d6b3b57..72cfb0c 100644 (file)
@@ -105,7 +105,7 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
        return skb;
 }
 
-static struct genl_multicast_group dropmon_mcgrps[] = {
+static const struct genl_multicast_group dropmon_mcgrps[] = {
        { .name = "events", },
 };
 
index cb06ace..298b146 100644 (file)
@@ -94,14 +94,13 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
 }
 EXPORT_SYMBOL(sk_filter_trim_cap);
 
-static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+BPF_CALL_1(__skb_get_pay_offset, struct sk_buff *, skb)
 {
-       return skb_get_poff((struct sk_buff *)(unsigned long) ctx);
+       return skb_get_poff(skb);
 }
 
-static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+BPF_CALL_3(__skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
 {
-       struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
        struct nlattr *nla;
 
        if (skb_is_nonlinear(skb))
@@ -120,9 +119,8 @@ static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
        return 0;
 }
 
-static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+BPF_CALL_3(__skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
 {
-       struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
        struct nlattr *nla;
 
        if (skb_is_nonlinear(skb))
@@ -145,7 +143,7 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
        return 0;
 }
 
-static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+BPF_CALL_0(__get_raw_cpu_id)
 {
        return raw_smp_processor_id();
 }
@@ -233,9 +231,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
        case SKF_AD_OFF + SKF_AD_HATYPE:
                BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
                BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
-               BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0);
 
-               *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
                                      BPF_REG_TMP, BPF_REG_CTX,
                                      offsetof(struct sk_buff, dev));
                /* if (tmp != 0) goto pc + 1 */
@@ -1350,14 +1347,18 @@ struct bpf_scratchpad {
 
 static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
 
+static inline int __bpf_try_make_writable(struct sk_buff *skb,
+                                         unsigned int write_len)
+{
+       return skb_ensure_writable(skb, write_len);
+}
+
 static inline int bpf_try_make_writable(struct sk_buff *skb,
                                        unsigned int write_len)
 {
-       int err;
+       int err = __bpf_try_make_writable(skb, write_len);
 
-       err = skb_ensure_writable(skb, write_len);
        bpf_compute_data_end(skb);
-
        return err;
 }
 
@@ -1373,12 +1374,9 @@ static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
                skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
 }
 
-static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
+BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
+          const void *, from, u32, len, u64, flags)
 {
-       struct sk_buff *skb = (struct sk_buff *) (long) r1;
-       unsigned int offset = (unsigned int) r2;
-       void *from = (void *) (long) r3;
-       unsigned int len = (unsigned int) r4;
        void *ptr;
 
        if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
@@ -1413,12 +1411,9 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
        .arg5_type      = ARG_ANYTHING,
 };
 
-static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
+          void *, to, u32, len)
 {
-       const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1;
-       unsigned int offset = (unsigned int) r2;
-       void *to = (void *)(unsigned long) r3;
-       unsigned int len = (unsigned int) r4;
        void *ptr;
 
        if (unlikely(offset > 0xffff))
@@ -1446,10 +1441,9 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
        .arg4_type      = ARG_CONST_STACK_SIZE,
 };
 
-static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
+BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
+          u64, from, u64, to, u64, flags)
 {
-       struct sk_buff *skb = (struct sk_buff *) (long) r1;
-       unsigned int offset = (unsigned int) r2;
        __sum16 *ptr;
 
        if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
@@ -1491,12 +1485,11 @@ static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
        .arg5_type      = ARG_ANYTHING,
 };
 
-static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
+BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
+          u64, from, u64, to, u64, flags)
 {
-       struct sk_buff *skb = (struct sk_buff *) (long) r1;
        bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
        bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
-       unsigned int offset = (unsigned int) r2;
        __sum16 *ptr;
 
        if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
@@ -1544,12 +1537,11 @@ static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
        .arg5_type      = ARG_ANYTHING,
 };
 
-static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed)
+BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
+          __be32 *, to, u32, to_size, __wsum, seed)
 {
        struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
-       u64 diff_size = from_size + to_size;
-       __be32 *from = (__be32 *) (long) r1;
-       __be32 *to   = (__be32 *) (long) r3;
+       u32 diff_size = from_size + to_size;
        int i, j = 0;
 
        /* This is quite flexible, some examples:
@@ -1607,9 +1599,8 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
        return ret;
 }
 
-static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
+BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
 {
-       struct sk_buff *skb = (struct sk_buff *) (long) r1;
        struct net_device *dev;
 
        if (unlikely(flags & ~(BPF_F_INGRESS)))
@@ -1645,7 +1636,7 @@ struct redirect_info {
 
 static DEFINE_PER_CPU(struct redirect_info, redirect_info);
 
-static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
 {
        struct redirect_info *ri = this_cpu_ptr(&redirect_info);
 
@@ -1684,9 +1675,9 @@ static const struct bpf_func_proto bpf_redirect_proto = {
        .arg2_type      = ARG_ANYTHING,
 };
 
-static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
-       return task_get_classid((struct sk_buff *) (unsigned long) r1);
+       return task_get_classid(skb);
 }
 
 static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
@@ -1696,9 +1687,9 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
        .arg1_type      = ARG_PTR_TO_CTX,
 };
 
-static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
 {
-       return dst_tclassid((struct sk_buff *) (unsigned long) r1);
+       return dst_tclassid(skb);
 }
 
 static const struct bpf_func_proto bpf_get_route_realm_proto = {
@@ -1708,14 +1699,14 @@ static const struct bpf_func_proto bpf_get_route_realm_proto = {
        .arg1_type      = ARG_PTR_TO_CTX,
 };
 
-static u64 bpf_get_hash_recalc(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
 {
        /* If skb_clear_hash() was called due to mangling, we can
         * trigger SW recalculation here. Later access to hash
         * can then use the inline skb->hash via context directly
         * instead of calling this helper again.
         */
-       return skb_get_hash((struct sk_buff *) (unsigned long) r1);
+       return skb_get_hash(skb);
 }
 
 static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
@@ -1725,10 +1716,9 @@ static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
        .arg1_type      = ARG_PTR_TO_CTX,
 };
 
-static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
+BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
+          u16, vlan_tci)
 {
-       struct sk_buff *skb = (struct sk_buff *) (long) r1;
-       __be16 vlan_proto = (__force __be16) r2;
        int ret;
 
        if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
@@ -1753,9 +1743,8 @@ const struct bpf_func_proto bpf_skb_vlan_push_proto = {
 };
 EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto);
 
-static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
 {
-       struct sk_buff *skb = (struct sk_buff *) (long) r1;
        int ret;
 
        bpf_push_mac_rcsum(skb);
@@ -1930,10 +1919,9 @@ static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
        return -ENOTSUPP;
 }
 
-static u64 bpf_skb_change_proto(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
+BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
+          u64, flags)
 {
-       struct sk_buff *skb = (struct sk_buff *) (long) r1;
-       __be16 proto = (__force __be16) r2;
        int ret;
 
        if (unlikely(flags))
@@ -1970,14 +1958,11 @@ static const struct bpf_func_proto bpf_skb_change_proto_proto = {
        .arg3_type      = ARG_ANYTHING,
 };
 
-static u64 bpf_skb_change_type(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
 {
-       struct sk_buff *skb = (struct sk_buff *) (long) r1;
-       u32 pkt_type = r2;
-
        /* We only allow a restricted subset to be changed for now. */
-       if (unlikely(skb->pkt_type > PACKET_OTHERHOST ||
-                    pkt_type > PACKET_OTHERHOST))
+       if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
+                    !skb_pkt_type_ok(pkt_type)))
                return -EINVAL;
 
        skb->pkt_type = pkt_type;
@@ -1992,6 +1977,90 @@ static const struct bpf_func_proto bpf_skb_change_type_proto = {
        .arg2_type      = ARG_ANYTHING,
 };
 
+static u32 __bpf_skb_min_len(const struct sk_buff *skb)
+{
+       u32 min_len = skb_network_offset(skb);
+
+       if (skb_transport_header_was_set(skb))
+               min_len = skb_transport_offset(skb);
+       if (skb->ip_summed == CHECKSUM_PARTIAL)
+               min_len = skb_checksum_start_offset(skb) +
+                         skb->csum_offset + sizeof(__sum16);
+       return min_len;
+}
+
+static u32 __bpf_skb_max_len(const struct sk_buff *skb)
+{
+       return skb->dev->mtu + skb->dev->hard_header_len;
+}
+
+static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
+{
+       unsigned int old_len = skb->len;
+       int ret;
+
+       ret = __skb_grow_rcsum(skb, new_len);
+       if (!ret)
+               memset(skb->data + old_len, 0, new_len - old_len);
+       return ret;
+}
+
+static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
+{
+       return __skb_trim_rcsum(skb, new_len);
+}
+
+BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
+          u64, flags)
+{
+       u32 max_len = __bpf_skb_max_len(skb);
+       u32 min_len = __bpf_skb_min_len(skb);
+       int ret;
+
+       if (unlikely(flags || new_len > max_len || new_len < min_len))
+               return -EINVAL;
+       if (skb->encapsulation)
+               return -ENOTSUPP;
+
+       /* The basic idea of this helper is that it's performing the
+        * needed work to either grow or trim an skb, and eBPF program
+        * rewrites the rest via helpers like bpf_skb_store_bytes(),
+        * bpf_lX_csum_replace() and others rather than passing a raw
+        * buffer here. This one is a slow path helper and intended
+        * for replies with control messages.
+        *
+        * Like in bpf_skb_change_proto(), we want to keep this rather
+        * minimal and without protocol specifics so that we are able
+        * to separate concerns as in bpf_skb_store_bytes() should only
+        * be the one responsible for writing buffers.
+        *
+        * It's really expected to be a slow path operation here for
+        * control message replies, so we're implicitly linearizing,
+        * uncloning and drop offloads from the skb by this.
+        */
+       ret = __bpf_try_make_writable(skb, skb->len);
+       if (!ret) {
+               if (new_len > skb->len)
+                       ret = bpf_skb_grow_rcsum(skb, new_len);
+               else if (new_len < skb->len)
+                       ret = bpf_skb_trim_rcsum(skb, new_len);
+               if (!ret && skb_is_gso(skb))
+                       skb_gso_reset(skb);
+       }
+
+       bpf_compute_data_end(skb);
+       return ret;
+}
+
+static const struct bpf_func_proto bpf_skb_change_tail_proto = {
+       .func           = bpf_skb_change_tail,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+};
+
 bool bpf_helper_changes_skb_data(void *func)
 {
        if (func == bpf_skb_vlan_push)
@@ -2002,6 +2071,8 @@ bool bpf_helper_changes_skb_data(void *func)
                return true;
        if (func == bpf_skb_change_proto)
                return true;
+       if (func == bpf_skb_change_tail)
+               return true;
        if (func == bpf_l3_csum_replace)
                return true;
        if (func == bpf_l4_csum_replace)
@@ -2023,13 +2094,10 @@ static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
        return 0;
 }
 
-static u64 bpf_skb_event_output(u64 r1, u64 r2, u64 flags, u64 r4,
-                               u64 meta_size)
+BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
+          u64, flags, void *, meta, u64, meta_size)
 {
-       struct sk_buff *skb = (struct sk_buff *)(long) r1;
-       struct bpf_map *map = (struct bpf_map *)(long) r2;
        u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
-       void *meta = (void *)(long) r4;
 
        if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
                return -EINVAL;
@@ -2056,10 +2124,9 @@ static unsigned short bpf_tunnel_key_af(u64 flags)
        return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
 }
 
-static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to,
+          u32, size, u64, flags)
 {
-       struct sk_buff *skb = (struct sk_buff *) (long) r1;
-       struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2;
        const struct ip_tunnel_info *info = skb_tunnel_info(skb);
        u8 compat[sizeof(struct bpf_tunnel_key)];
        void *to_orig = to;
@@ -2124,10 +2191,8 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
        .arg4_type      = ARG_ANYTHING,
 };
 
-static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
+BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
 {
-       struct sk_buff *skb = (struct sk_buff *) (long) r1;
-       u8 *to = (u8 *) (long) r2;
        const struct ip_tunnel_info *info = skb_tunnel_info(skb);
        int err;
 
@@ -2162,10 +2227,9 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
 
 static struct metadata_dst __percpu *md_dst;
 
-static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
+          const struct bpf_tunnel_key *, from, u32, size, u64, flags)
 {
-       struct sk_buff *skb = (struct sk_buff *) (long) r1;
-       struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2;
        struct metadata_dst *md = this_cpu_ptr(md_dst);
        u8 compat[sizeof(struct bpf_tunnel_key)];
        struct ip_tunnel_info *info;
@@ -2183,7 +2247,7 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
                         */
                        memcpy(compat, from, size);
                        memset(compat + size, 0, sizeof(compat) - size);
-                       from = (struct bpf_tunnel_key *)compat;
+                       from = (const struct bpf_tunnel_key *) compat;
                        break;
                default:
                        return -EINVAL;
@@ -2233,10 +2297,9 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
        .arg4_type      = ARG_ANYTHING,
 };
 
-static u64 bpf_skb_set_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
+BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
+          const u8 *, from, u32, size)
 {
-       struct sk_buff *skb = (struct sk_buff *) (long) r1;
-       u8 *from = (u8 *) (long) r2;
        struct ip_tunnel_info *info = skb_tunnel_info(skb);
        const struct metadata_dst *md = this_cpu_ptr(md_dst);
 
@@ -2282,28 +2345,24 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
        }
 }
 
-#ifdef CONFIG_SOCK_CGROUP_DATA
-static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map,
+          u32, idx)
 {
-       struct sk_buff *skb = (struct sk_buff *)(long)r1;
-       struct bpf_map *map = (struct bpf_map *)(long)r2;
        struct bpf_array *array = container_of(map, struct bpf_array, map);
        struct cgroup *cgrp;
        struct sock *sk;
-       u32 i = (u32)r3;
 
        sk = skb->sk;
        if (!sk || !sk_fullsock(sk))
                return -ENOENT;
-
-       if (unlikely(i >= array->map.max_entries))
+       if (unlikely(idx >= array->map.max_entries))
                return -E2BIG;
 
-       cgrp = READ_ONCE(array->ptrs[i]);
+       cgrp = READ_ONCE(array->ptrs[idx]);
        if (unlikely(!cgrp))
                return -EAGAIN;
 
-       return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
+       return sk_under_cgroup_hierarchy(sk, cgrp);
 }
 
 static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
@@ -2314,7 +2373,38 @@ static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
        .arg2_type      = ARG_CONST_MAP_PTR,
        .arg3_type      = ARG_ANYTHING,
 };
-#endif
+
+static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
+                                 unsigned long off, unsigned long len)
+{
+       memcpy(dst_buff, src_buff + off, len);
+       return 0;
+}
+
+BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
+          u64, flags, void *, meta, u64, meta_size)
+{
+       u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
+
+       if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
+               return -EINVAL;
+       if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+               return -EFAULT;
+
+       return bpf_event_output(map, flags, meta, meta_size, xdp, xdp_size,
+                               bpf_xdp_copy);
+}
+
+static const struct bpf_func_proto bpf_xdp_event_output_proto = {
+       .func           = bpf_xdp_event_output,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_CONST_MAP_PTR,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_PTR_TO_STACK,
+       .arg5_type      = ARG_CONST_STACK_SIZE,
+};
 
 static const struct bpf_func_proto *
 sk_filter_func_proto(enum bpf_func_id func_id)
@@ -2368,6 +2458,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
                return &bpf_skb_change_proto_proto;
        case BPF_FUNC_skb_change_type:
                return &bpf_skb_change_type_proto;
+       case BPF_FUNC_skb_change_tail:
+               return &bpf_skb_change_tail_proto;
        case BPF_FUNC_skb_get_tunnel_key:
                return &bpf_skb_get_tunnel_key_proto;
        case BPF_FUNC_skb_set_tunnel_key:
@@ -2386,10 +2478,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
                return &bpf_skb_event_output_proto;
        case BPF_FUNC_get_smp_processor_id:
                return &bpf_get_smp_processor_id_proto;
-#ifdef CONFIG_SOCK_CGROUP_DATA
        case BPF_FUNC_skb_under_cgroup:
                return &bpf_skb_under_cgroup_proto;
-#endif
        default:
                return sk_filter_func_proto(func_id);
        }
@@ -2398,7 +2488,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 static const struct bpf_func_proto *
 xdp_func_proto(enum bpf_func_id func_id)
 {
-       return sk_filter_func_proto(func_id);
+       switch (func_id) {
+       case BPF_FUNC_perf_event_output:
+               return &bpf_xdp_event_output_proto;
+       default:
+               return sk_filter_func_proto(func_id);
+       }
 }
 
 static bool __is_valid_access(int off, int size, enum bpf_access_type type)
@@ -2475,7 +2570,7 @@ static bool __is_valid_xdp_access(int off, int size,
                return false;
        if (off % size != 0)
                return false;
-       if (size != 4)
+       if (size != sizeof(__u32))
                return false;
 
        return true;
@@ -2506,10 +2601,10 @@ void bpf_warn_invalid_xdp_action(u32 act)
 }
 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 
-static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
-                                     int src_reg, int ctx_off,
-                                     struct bpf_insn *insn_buf,
-                                     struct bpf_prog *prog)
+static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+                                       int src_reg, int ctx_off,
+                                       struct bpf_insn *insn_buf,
+                                       struct bpf_prog *prog)
 {
        struct bpf_insn *insn = insn_buf;
 
@@ -2556,7 +2651,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
        case offsetof(struct __sk_buff, ifindex):
                BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
 
-               *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
                                      dst_reg, src_reg,
                                      offsetof(struct sk_buff, dev));
                *insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1);
@@ -2597,7 +2692,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
                                          dst_reg, src_reg, insn);
 
        case offsetof(struct __sk_buff, cb[0]) ...
-               offsetof(struct __sk_buff, cb[4]):
+            offsetof(struct __sk_buff, cb[4]):
                BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
 
                prog->cb_access = 1;
@@ -2621,7 +2716,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
                break;
 
        case offsetof(struct __sk_buff, data):
-               *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, data)),
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
                                      dst_reg, src_reg,
                                      offsetof(struct sk_buff, data));
                break;
@@ -2630,8 +2725,8 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
                ctx_off -= offsetof(struct __sk_buff, data_end);
                ctx_off += offsetof(struct sk_buff, cb);
                ctx_off += offsetof(struct bpf_skb_data_end, data_end);
-               *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(void *)),
-                                     dst_reg, src_reg, ctx_off);
+               *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), dst_reg, src_reg,
+                                     ctx_off);
                break;
 
        case offsetof(struct __sk_buff, tc_index):
@@ -2657,6 +2752,31 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
        return insn - insn_buf;
 }
 
+static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+                                        int src_reg, int ctx_off,
+                                        struct bpf_insn *insn_buf,
+                                        struct bpf_prog *prog)
+{
+       struct bpf_insn *insn = insn_buf;
+
+       switch (ctx_off) {
+       case offsetof(struct __sk_buff, ifindex):
+               BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
+                                     dst_reg, src_reg,
+                                     offsetof(struct sk_buff, dev));
+               *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg,
+                                     offsetof(struct net_device, ifindex));
+               break;
+       default:
+               return sk_filter_convert_ctx_access(type, dst_reg, src_reg,
+                                                   ctx_off, insn_buf, prog);
+       }
+
+       return insn - insn_buf;
+}
+
 static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg,
                                  int src_reg, int ctx_off,
                                  struct bpf_insn *insn_buf,
@@ -2666,12 +2786,12 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 
        switch (ctx_off) {
        case offsetof(struct xdp_md, data):
-               *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data)),
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
                                      dst_reg, src_reg,
                                      offsetof(struct xdp_buff, data));
                break;
        case offsetof(struct xdp_md, data_end):
-               *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data_end)),
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
                                      dst_reg, src_reg,
                                      offsetof(struct xdp_buff, data_end));
                break;
@@ -2683,13 +2803,13 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 static const struct bpf_verifier_ops sk_filter_ops = {
        .get_func_proto         = sk_filter_func_proto,
        .is_valid_access        = sk_filter_is_valid_access,
-       .convert_ctx_access     = bpf_net_convert_ctx_access,
+       .convert_ctx_access     = sk_filter_convert_ctx_access,
 };
 
 static const struct bpf_verifier_ops tc_cls_act_ops = {
        .get_func_proto         = tc_cls_act_func_proto,
        .is_valid_access        = tc_cls_act_is_valid_access,
-       .convert_ctx_access     = bpf_net_convert_ctx_access,
+       .convert_ctx_access     = tc_cls_act_convert_ctx_access,
 };
 
 static const struct bpf_verifier_ops xdp_ops = {
index 91028ae..1a7b80f 100644 (file)
@@ -118,13 +118,16 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
        struct flow_dissector_key_addrs *key_addrs;
        struct flow_dissector_key_ports *key_ports;
        struct flow_dissector_key_tags *key_tags;
+       struct flow_dissector_key_vlan *key_vlan;
        struct flow_dissector_key_keyid *key_keyid;
+       bool skip_vlan = false;
        u8 ip_proto = 0;
        bool ret = false;
 
        if (!data) {
                data = skb->data;
-               proto = skb->protocol;
+               proto = skb_vlan_tag_present(skb) ?
+                        skb->vlan_proto : skb->protocol;
                nhoff = skb_network_offset(skb);
                hlen = skb_headlen(skb);
        }
@@ -243,23 +246,45 @@ ipv6:
        case htons(ETH_P_8021AD):
        case htons(ETH_P_8021Q): {
                const struct vlan_hdr *vlan;
-               struct vlan_hdr _vlan;
 
-               vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan);
-               if (!vlan)
-                       goto out_bad;
+               if (skb_vlan_tag_present(skb))
+                       proto = skb->protocol;
+
+               if (!skb_vlan_tag_present(skb) ||
+                   proto == cpu_to_be16(ETH_P_8021Q) ||
+                   proto == cpu_to_be16(ETH_P_8021AD)) {
+                       struct vlan_hdr _vlan;
+
+                       vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
+                                                   data, hlen, &_vlan);
+                       if (!vlan)
+                               goto out_bad;
+                       proto = vlan->h_vlan_encapsulated_proto;
+                       nhoff += sizeof(*vlan);
+                       if (skip_vlan)
+                               goto again;
+               }
 
+               skip_vlan = true;
                if (dissector_uses_key(flow_dissector,
-                                      FLOW_DISSECTOR_KEY_VLANID)) {
-                       key_tags = skb_flow_dissector_target(flow_dissector,
-                                                            FLOW_DISSECTOR_KEY_VLANID,
+                                      FLOW_DISSECTOR_KEY_VLAN)) {
+                       key_vlan = skb_flow_dissector_target(flow_dissector,
+                                                            FLOW_DISSECTOR_KEY_VLAN,
                                                             target_container);
 
-                       key_tags->vlan_id = skb_vlan_tag_get_id(skb);
+                       if (skb_vlan_tag_present(skb)) {
+                               key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
+                               key_vlan->vlan_priority =
+                                       (skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
+                       } else {
+                               key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) &
+                                       VLAN_VID_MASK;
+                               key_vlan->vlan_priority =
+                                       (ntohs(vlan->h_vlan_TCI) &
+                                        VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+                       }
                }
 
-               proto = vlan->h_vlan_encapsulated_proto;
-               nhoff += sizeof(*vlan);
                goto again;
        }
        case htons(ETH_P_PPP_SES): {
@@ -725,11 +750,13 @@ EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
 void __skb_get_hash(struct sk_buff *skb)
 {
        struct flow_keys keys;
+       u32 hash;
 
        __flow_hash_secret_init();
 
-       __skb_set_sw_hash(skb, ___skb_get_hash(skb, &keys, hashrnd),
-                         flow_keys_have_l4(&keys));
+       hash = ___skb_get_hash(skb, &keys, hashrnd);
+
+       __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
 }
 EXPORT_SYMBOL(__skb_get_hash);
 
@@ -917,8 +944,8 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
                .offset = offsetof(struct flow_keys, ports),
        },
        {
-               .key_id = FLOW_DISSECTOR_KEY_VLANID,
-               .offset = offsetof(struct flow_keys, tags),
+               .key_id = FLOW_DISSECTOR_KEY_VLAN,
+               .offset = offsetof(struct flow_keys, vlan),
        },
        {
                .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL,
index 669ecc9..e5f84c2 100644 (file)
@@ -251,6 +251,41 @@ drop:
 }
 EXPORT_SYMBOL(lwtunnel_output);
 
+int lwtunnel_xmit(struct sk_buff *skb)
+{
+       struct dst_entry *dst = skb_dst(skb);
+       const struct lwtunnel_encap_ops *ops;
+       struct lwtunnel_state *lwtstate;
+       int ret = -EINVAL;
+
+       if (!dst)
+               goto drop;
+
+       lwtstate = dst->lwtstate;
+
+       if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+           lwtstate->type > LWTUNNEL_ENCAP_MAX)
+               return 0;
+
+       ret = -EOPNOTSUPP;
+       rcu_read_lock();
+       ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+       if (likely(ops && ops->xmit))
+               ret = ops->xmit(skb);
+       rcu_read_unlock();
+
+       if (ret == -EOPNOTSUPP)
+               goto drop;
+
+       return ret;
+
+drop:
+       kfree_skb(skb);
+
+       return ret;
+}
+EXPORT_SYMBOL(lwtunnel_xmit);
+
 int lwtunnel_input(struct sk_buff *skb)
 {
        struct dst_entry *dst = skb_dst(skb);
index 1fe5816..42bdda0 100644 (file)
@@ -215,31 +215,29 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id);
  */
 int peernet2id_alloc(struct net *net, struct net *peer)
 {
-       unsigned long flags;
        bool alloc;
        int id;
 
-       spin_lock_irqsave(&net->nsid_lock, flags);
+       spin_lock_bh(&net->nsid_lock);
        alloc = atomic_read(&peer->count) == 0 ? false : true;
        id = __peernet2id_alloc(net, peer, &alloc);
-       spin_unlock_irqrestore(&net->nsid_lock, flags);
+       spin_unlock_bh(&net->nsid_lock);
        if (alloc && id >= 0)
                rtnl_net_notifyid(net, RTM_NEWNSID, id);
        return id;
 }
-EXPORT_SYMBOL(peernet2id_alloc);
 
 /* This function returns, if assigned, the id of a peer netns. */
 int peernet2id(struct net *net, struct net *peer)
 {
-       unsigned long flags;
        int id;
 
-       spin_lock_irqsave(&net->nsid_lock, flags);
+       spin_lock_bh(&net->nsid_lock);
        id = __peernet2id(net, peer);
-       spin_unlock_irqrestore(&net->nsid_lock, flags);
+       spin_unlock_bh(&net->nsid_lock);
        return id;
 }
+EXPORT_SYMBOL(peernet2id);
 
 /* This function returns true is the peer netns has an id assigned into the
  * current netns.
@@ -251,18 +249,17 @@ bool peernet_has_id(struct net *net, struct net *peer)
 
 struct net *get_net_ns_by_id(struct net *net, int id)
 {
-       unsigned long flags;
        struct net *peer;
 
        if (id < 0)
                return NULL;
 
        rcu_read_lock();
-       spin_lock_irqsave(&net->nsid_lock, flags);
+       spin_lock_bh(&net->nsid_lock);
        peer = idr_find(&net->netns_ids, id);
        if (peer)
                get_net(peer);
-       spin_unlock_irqrestore(&net->nsid_lock, flags);
+       spin_unlock_bh(&net->nsid_lock);
        rcu_read_unlock();
 
        return peer;
@@ -406,17 +403,17 @@ static void cleanup_net(struct work_struct *work)
                for_each_net(tmp) {
                        int id;
 
-                       spin_lock_irq(&tmp->nsid_lock);
+                       spin_lock_bh(&tmp->nsid_lock);
                        id = __peernet2id(tmp, net);
                        if (id >= 0)
                                idr_remove(&tmp->netns_ids, id);
-                       spin_unlock_irq(&tmp->nsid_lock);
+                       spin_unlock_bh(&tmp->nsid_lock);
                        if (id >= 0)
                                rtnl_net_notifyid(tmp, RTM_DELNSID, id);
                }
-               spin_lock_irq(&net->nsid_lock);
+               spin_lock_bh(&net->nsid_lock);
                idr_destroy(&net->netns_ids);
-               spin_unlock_irq(&net->nsid_lock);
+               spin_unlock_bh(&net->nsid_lock);
 
        }
        rtnl_unlock();
@@ -533,7 +530,7 @@ static struct pernet_operations __net_initdata net_ns_ops = {
        .exit = net_ns_net_exit,
 };
 
-static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
+static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
        [NETNSA_NONE]           = { .type = NLA_UNSPEC },
        [NETNSA_NSID]           = { .type = NLA_S32 },
        [NETNSA_PID]            = { .type = NLA_U32 },
@@ -544,7 +541,6 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
        struct net *net = sock_net(skb->sk);
        struct nlattr *tb[NETNSA_MAX + 1];
-       unsigned long flags;
        struct net *peer;
        int nsid, err;
 
@@ -565,15 +561,15 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
        if (IS_ERR(peer))
                return PTR_ERR(peer);
 
-       spin_lock_irqsave(&net->nsid_lock, flags);
+       spin_lock_bh(&net->nsid_lock);
        if (__peernet2id(net, peer) >= 0) {
-               spin_unlock_irqrestore(&net->nsid_lock, flags);
+               spin_unlock_bh(&net->nsid_lock);
                err = -EEXIST;
                goto out;
        }
 
        err = alloc_netid(net, peer, nsid);
-       spin_unlock_irqrestore(&net->nsid_lock, flags);
+       spin_unlock_bh(&net->nsid_lock);
        if (err >= 0) {
                rtnl_net_notifyid(net, RTM_NEWNSID, err);
                err = 0;
@@ -695,11 +691,10 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
                .idx = 0,
                .s_idx = cb->args[0],
        };
-       unsigned long flags;
 
-       spin_lock_irqsave(&net->nsid_lock, flags);
+       spin_lock_bh(&net->nsid_lock);
        idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
-       spin_unlock_irqrestore(&net->nsid_lock, flags);
+       spin_unlock_bh(&net->nsid_lock);
 
        cb->args[0] = net_cb.idx;
        return skb->len;
index 189cc78..937e459 100644 (file)
@@ -704,6 +704,8 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
                        } else if (i == RTAX_FEATURES - 1) {
                                u32 user_features = metrics[i] & RTAX_FEATURE_MASK;
 
+                               if (!user_features)
+                                       continue;
                                BUILD_BUG_ON(RTAX_FEATURE_MASK & DST_FEATURE_MASK);
                                if (nla_put_u32(skb, i + 1, user_features))
                                        goto nla_put_failure;
@@ -3066,7 +3068,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
        seq = cb->nlh->nlmsg_seq;
 
        list_for_each_entry(ha, &list->list, list) {
-               if (*idx < cb->args[0])
+               if (*idx < cb->args[2])
                        goto skip;
 
                err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0,
@@ -3093,19 +3095,18 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb,
                      struct netlink_callback *cb,
                      struct net_device *dev,
                      struct net_device *filter_dev,
-                     int idx)
+                     int *idx)
 {
        int err;
 
        netif_addr_lock_bh(dev);
-       err = nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->uc);
+       err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc);
        if (err)
                goto out;
-       nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc);
+       nlmsg_populate_fdb(skb, cb, dev, idx, &dev->mc);
 out:
        netif_addr_unlock_bh(dev);
-       cb->args[1] = err;
-       return idx;
+       return err;
 }
 EXPORT_SYMBOL(ndo_dflt_fdb_dump);
 
@@ -3118,9 +3119,13 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
        const struct net_device_ops *cops = NULL;
        struct ifinfomsg *ifm = nlmsg_data(cb->nlh);
        struct net *net = sock_net(skb->sk);
+       struct hlist_head *head;
        int brport_idx = 0;
        int br_idx = 0;
-       int idx = 0;
+       int h, s_h;
+       int idx = 0, s_idx;
+       int err = 0;
+       int fidx = 0;
 
        if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
                        ifla_policy) == 0) {
@@ -3138,49 +3143,71 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
                ops = br_dev->netdev_ops;
        }
 
-       cb->args[1] = 0;
-       for_each_netdev(net, dev) {
-               if (brport_idx && (dev->ifindex != brport_idx))
-                       continue;
+       s_h = cb->args[0];
+       s_idx = cb->args[1];
 
-               if (!br_idx) { /* user did not specify a specific bridge */
-                       if (dev->priv_flags & IFF_BRIDGE_PORT) {
-                               br_dev = netdev_master_upper_dev_get(dev);
-                               cops = br_dev->netdev_ops;
-                       }
+       for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+               idx = 0;
+               head = &net->dev_index_head[h];
+               hlist_for_each_entry(dev, head, index_hlist) {
 
-               } else {
-                       if (dev != br_dev &&
-                           !(dev->priv_flags & IFF_BRIDGE_PORT))
+                       if (brport_idx && (dev->ifindex != brport_idx))
                                continue;
 
-                       if (br_dev != netdev_master_upper_dev_get(dev) &&
-                           !(dev->priv_flags & IFF_EBRIDGE))
-                               continue;
+                       if (!br_idx) { /* user did not specify a specific bridge */
+                               if (dev->priv_flags & IFF_BRIDGE_PORT) {
+                                       br_dev = netdev_master_upper_dev_get(dev);
+                                       cops = br_dev->netdev_ops;
+                               }
+                       } else {
+                               if (dev != br_dev &&
+                                   !(dev->priv_flags & IFF_BRIDGE_PORT))
+                                       continue;
 
-                       cops = ops;
-               }
+                               if (br_dev != netdev_master_upper_dev_get(dev) &&
+                                   !(dev->priv_flags & IFF_EBRIDGE))
+                                       continue;
+                               cops = ops;
+                       }
 
-               if (dev->priv_flags & IFF_BRIDGE_PORT) {
-                       if (cops && cops->ndo_fdb_dump)
-                               idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev,
-                                                        idx);
-               }
-               if (cb->args[1] == -EMSGSIZE)
-                       break;
+                       if (idx < s_idx)
+                               goto cont;
 
-               if (dev->netdev_ops->ndo_fdb_dump)
-                       idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL,
-                                                           idx);
-               else
-                       idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
-               if (cb->args[1] == -EMSGSIZE)
-                       break;
+                       if (dev->priv_flags & IFF_BRIDGE_PORT) {
+                               if (cops && cops->ndo_fdb_dump) {
+                                       err = cops->ndo_fdb_dump(skb, cb,
+                                                               br_dev, dev,
+                                                               &fidx);
+                                       if (err == -EMSGSIZE)
+                                               goto out;
+                               }
+                       }
+
+                       if (dev->netdev_ops->ndo_fdb_dump)
+                               err = dev->netdev_ops->ndo_fdb_dump(skb, cb,
+                                                                   dev, NULL,
+                                                                   &fidx);
+                       else
+                               err = ndo_dflt_fdb_dump(skb, cb, dev, NULL,
+                                                       &fidx);
+                       if (err == -EMSGSIZE)
+                               goto out;
 
-               cops = NULL;
+                       cops = NULL;
+
+                       /* reset fdb offset to 0 for rest of the interfaces */
+                       cb->args[2] = 0;
+                       fidx = 0;
+cont:
+                       idx++;
+               }
        }
 
-       cb->args[0] = idx;
+out:
+       cb->args[0] = h;
+       cb->args[1] = idx;
+       cb->args[2] = fidx;
+
        return skb->len;
 }
 
@@ -3642,10 +3669,6 @@ nla_put_failure:
        return -EMSGSIZE;
 }
 
-static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
-       [IFLA_STATS_LINK_64]    = { .len = sizeof(struct rtnl_link_stats64) },
-};
-
 static size_t if_nlmsg_stats_size(const struct net_device *dev,
                                  u32 filter_mask)
 {
index 3864b4b..1e329d4 100644 (file)
@@ -2444,6 +2444,25 @@ void skb_queue_purge(struct sk_buff_head *list)
 }
 EXPORT_SYMBOL(skb_queue_purge);
 
+/**
+ *     skb_rbtree_purge - empty a skb rbtree
+ *     @root: root of the rbtree to empty
+ *
+ *     Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
+ *     the list and one reference dropped. This function does not take
+ *     any lock. Synchronization should be handled by the caller (e.g., TCP
+ *     out-of-order queue is protected by the socket lock).
+ */
+void skb_rbtree_purge(struct rb_root *root)
+{
+       struct sk_buff *skb, *next;
+
+       rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode)
+               kfree_skb(skb);
+
+       *root = RB_ROOT;
+}
+
 /**
  *     skb_queue_head - queue a buffer at the list head
  *     @list: list to use
index 25dab8b..51a7304 100644 (file)
@@ -1315,24 +1315,6 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
 #endif
 }
 
-void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
-{
-       unsigned long nulls1, nulls2;
-
-       nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
-       nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
-       if (nulls1 > nulls2)
-               swap(nulls1, nulls2);
-
-       if (nulls1 != 0)
-               memset((char *)sk, 0, nulls1);
-       memset((char *)sk + nulls1 + sizeof(void *), 0,
-              nulls2 - nulls1 - sizeof(void *));
-       memset((char *)sk + nulls2 + sizeof(void *), 0,
-              size - nulls2 - sizeof(void *));
-}
-EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
-
 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
                int family)
 {
@@ -1344,12 +1326,8 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
                sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
                if (!sk)
                        return sk;
-               if (priority & __GFP_ZERO) {
-                       if (prot->clear_sk)
-                               prot->clear_sk(sk, prot->obj_size);
-                       else
-                               sk_prot_clear_nulls(sk, prot->obj_size);
-               }
+               if (priority & __GFP_ZERO)
+                       sk_prot_clear_nulls(sk, prot->obj_size);
        } else
                sk = kmalloc(prot->obj_size, priority);
 
index ff7736f..96e47c5 100644 (file)
@@ -38,4 +38,7 @@ config NET_DSA_TAG_EDSA
 config NET_DSA_TAG_TRAILER
        bool
 
+config NET_DSA_TAG_QCA
+       bool
+
 endif
index 8af4ded..a3380ed 100644 (file)
@@ -7,3 +7,4 @@ dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
 dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
 dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
 dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
+dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
index 7e68bc6..66e31ac 100644 (file)
@@ -53,6 +53,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
 #endif
 #ifdef CONFIG_NET_DSA_TAG_BRCM
        [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
+#endif
+#ifdef CONFIG_NET_DSA_TAG_QCA
+       [DSA_TAG_PROTO_QCA] = &qca_netdev_ops,
 #endif
        [DSA_TAG_PROTO_NONE] = &none_ops,
 };
@@ -61,27 +64,27 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
 static DEFINE_MUTEX(dsa_switch_drivers_mutex);
 static LIST_HEAD(dsa_switch_drivers);
 
-void register_switch_driver(struct dsa_switch_driver *drv)
+void register_switch_driver(struct dsa_switch_ops *ops)
 {
        mutex_lock(&dsa_switch_drivers_mutex);
-       list_add_tail(&drv->list, &dsa_switch_drivers);
+       list_add_tail(&ops->list, &dsa_switch_drivers);
        mutex_unlock(&dsa_switch_drivers_mutex);
 }
 EXPORT_SYMBOL_GPL(register_switch_driver);
 
-void unregister_switch_driver(struct dsa_switch_driver *drv)
+void unregister_switch_driver(struct dsa_switch_ops *ops)
 {
        mutex_lock(&dsa_switch_drivers_mutex);
-       list_del_init(&drv->list);
+       list_del_init(&ops->list);
        mutex_unlock(&dsa_switch_drivers_mutex);
 }
 EXPORT_SYMBOL_GPL(unregister_switch_driver);
 
-static struct dsa_switch_driver *
+static struct dsa_switch_ops *
 dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr,
                 const char **_name, void **priv)
 {
-       struct dsa_switch_driver *ret;
+       struct dsa_switch_ops *ret;
        struct list_head *list;
        const char *name;
 
@@ -90,13 +93,13 @@ dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr,
 
        mutex_lock(&dsa_switch_drivers_mutex);
        list_for_each(list, &dsa_switch_drivers) {
-               struct dsa_switch_driver *drv;
+               struct dsa_switch_ops *ops;
 
-               drv = list_entry(list, struct dsa_switch_driver, list);
+               ops = list_entry(list, struct dsa_switch_ops, list);
 
-               name = drv->probe(parent, host_dev, sw_addr, priv);
+               name = ops->probe(parent, host_dev, sw_addr, priv);
                if (name != NULL) {
-                       ret = drv;
+                       ret = ops;
                        break;
                }
        }
@@ -117,7 +120,7 @@ static ssize_t temp1_input_show(struct device *dev,
        struct dsa_switch *ds = dev_get_drvdata(dev);
        int temp, ret;
 
-       ret = ds->drv->get_temp(ds, &temp);
+       ret = ds->ops->get_temp(ds, &temp);
        if (ret < 0)
                return ret;
 
@@ -131,7 +134,7 @@ static ssize_t temp1_max_show(struct device *dev,
        struct dsa_switch *ds = dev_get_drvdata(dev);
        int temp, ret;
 
-       ret = ds->drv->get_temp_limit(ds, &temp);
+       ret = ds->ops->get_temp_limit(ds, &temp);
        if (ret < 0)
                return ret;
 
@@ -149,7 +152,7 @@ static ssize_t temp1_max_store(struct device *dev,
        if (ret < 0)
                return ret;
 
-       ret = ds->drv->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000));
+       ret = ds->ops->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000));
        if (ret < 0)
                return ret;
 
@@ -164,7 +167,7 @@ static ssize_t temp1_max_alarm_show(struct device *dev,
        bool alarm;
        int ret;
 
-       ret = ds->drv->get_temp_alarm(ds, &alarm);
+       ret = ds->ops->get_temp_alarm(ds, &alarm);
        if (ret < 0)
                return ret;
 
@@ -184,15 +187,15 @@ static umode_t dsa_hwmon_attrs_visible(struct kobject *kobj,
 {
        struct device *dev = container_of(kobj, struct device, kobj);
        struct dsa_switch *ds = dev_get_drvdata(dev);
-       struct dsa_switch_driver *drv = ds->drv;
+       struct dsa_switch_ops *ops = ds->ops;
        umode_t mode = attr->mode;
 
        if (index == 1) {
-               if (!drv->get_temp_limit)
+               if (!ops->get_temp_limit)
                        mode = 0;
-               else if (!drv->set_temp_limit)
+               else if (!ops->set_temp_limit)
                        mode &= ~S_IWUSR;
-       } else if (index == 2 && !drv->get_temp_alarm) {
+       } else if (index == 2 && !ops->get_temp_alarm) {
                mode = 0;
        }
        return mode;
@@ -228,8 +231,8 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
 
                genphy_config_init(phydev);
                genphy_read_status(phydev);
-               if (ds->drv->adjust_link)
-                       ds->drv->adjust_link(ds, port, phydev);
+               if (ds->ops->adjust_link)
+                       ds->ops->adjust_link(ds, port, phydev);
        }
 
        return 0;
@@ -303,7 +306,7 @@ void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds)
 
 static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
 {
-       struct dsa_switch_driver *drv = ds->drv;
+       struct dsa_switch_ops *ops = ds->ops;
        struct dsa_switch_tree *dst = ds->dst;
        struct dsa_chip_data *cd = ds->cd;
        bool valid_name_found = false;
@@ -354,7 +357,10 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
         * switch.
         */
        if (dst->cpu_switch == index) {
-               dst->tag_ops = dsa_resolve_tag_protocol(drv->tag_protocol);
+               enum dsa_tag_protocol tag_protocol;
+
+               tag_protocol = ops->get_tag_protocol(ds);
+               dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
                if (IS_ERR(dst->tag_ops)) {
                        ret = PTR_ERR(dst->tag_ops);
                        goto out;
@@ -368,15 +374,15 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
        /*
         * Do basic register setup.
         */
-       ret = drv->setup(ds);
+       ret = ops->setup(ds);
        if (ret < 0)
                goto out;
 
-       ret = drv->set_addr(ds, dst->master_netdev->dev_addr);
+       ret = ops->set_addr(ds, dst->master_netdev->dev_addr);
        if (ret < 0)
                goto out;
 
-       if (!ds->slave_mii_bus && drv->phy_read) {
+       if (!ds->slave_mii_bus && ops->phy_read) {
                ds->slave_mii_bus = devm_mdiobus_alloc(parent);
                if (!ds->slave_mii_bus) {
                        ret = -ENOMEM;
@@ -423,7 +429,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
         * register with hardware monitoring subsystem.
         * Treat registration error as non-fatal and ignore it.
         */
-       if (drv->get_temp) {
+       if (ops->get_temp) {
                const char *netname = netdev_name(dst->master_netdev);
                char hname[IFNAMSIZ + 1];
                int i, j;
@@ -454,7 +460,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
                 struct device *parent, struct device *host_dev)
 {
        struct dsa_chip_data *cd = dst->pd->chip + index;
-       struct dsa_switch_driver *drv;
+       struct dsa_switch_ops *ops;
        struct dsa_switch *ds;
        int ret;
        const char *name;
@@ -463,8 +469,8 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
        /*
         * Probe for switch model.
         */
-       drv = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv);
-       if (drv == NULL) {
+       ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv);
+       if (!ops) {
                netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n",
                           index);
                return ERR_PTR(-EINVAL);
@@ -483,7 +489,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
        ds->dst = dst;
        ds->index = index;
        ds->cd = cd;
-       ds->drv = drv;
+       ds->ops = ops;
        ds->priv = priv;
        ds->dev = parent;
 
@@ -538,12 +544,12 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
                ds->dsa_port_mask |= ~(1 << port);
        }
 
-       if (ds->slave_mii_bus && ds->drv->phy_read)
+       if (ds->slave_mii_bus && ds->ops->phy_read)
                mdiobus_unregister(ds->slave_mii_bus);
 }
 
 #ifdef CONFIG_PM_SLEEP
-static int dsa_switch_suspend(struct dsa_switch *ds)
+int dsa_switch_suspend(struct dsa_switch *ds)
 {
        int i, ret = 0;
 
@@ -557,18 +563,19 @@ static int dsa_switch_suspend(struct dsa_switch *ds)
                        return ret;
        }
 
-       if (ds->drv->suspend)
-               ret = ds->drv->suspend(ds);
+       if (ds->ops->suspend)
+               ret = ds->ops->suspend(ds);
 
        return ret;
 }
+EXPORT_SYMBOL_GPL(dsa_switch_suspend);
 
-static int dsa_switch_resume(struct dsa_switch *ds)
+int dsa_switch_resume(struct dsa_switch *ds)
 {
        int i, ret = 0;
 
-       if (ds->drv->resume)
-               ret = ds->drv->resume(ds);
+       if (ds->ops->resume)
+               ret = ds->ops->resume(ds);
 
        if (ret)
                return ret;
@@ -585,6 +592,7 @@ static int dsa_switch_resume(struct dsa_switch *ds)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(dsa_switch_resume);
 #endif
 
 /* platform driver init and cleanup *****************************************/
@@ -1086,7 +1094,6 @@ static int dsa_resume(struct device *d)
 static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume);
 
 static const struct of_device_id dsa_of_match_table[] = {
-       { .compatible = "brcm,bcm7445-switch-v4.0" },
        { .compatible = "marvell,dsa", },
        {}
 };
index f30bad9..8278385 100644 (file)
@@ -294,25 +294,25 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
        int err;
 
        /* Initialize ds->phys_mii_mask before registering the slave MDIO bus
-        * driver and before drv->setup() has run, since the switch drivers and
+        * driver and before ops->setup() has run, since the switch drivers and
         * the slave MDIO bus driver rely on these values for probing PHY
         * devices or not
         */
        ds->phys_mii_mask = ds->enabled_port_mask;
 
-       err = ds->drv->setup(ds);
+       err = ds->ops->setup(ds);
        if (err < 0)
                return err;
 
-       err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr);
+       err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr);
        if (err < 0)
                return err;
 
-       err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr);
+       err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr);
        if (err < 0)
                return err;
 
-       if (!ds->slave_mii_bus && ds->drv->phy_read) {
+       if (!ds->slave_mii_bus && ds->ops->phy_read) {
                ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
                if (!ds->slave_mii_bus)
                        return -ENOMEM;
@@ -374,7 +374,7 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
                dsa_user_port_unapply(port, index, ds);
        }
 
-       if (ds->slave_mii_bus && ds->drv->phy_read)
+       if (ds->slave_mii_bus && ds->ops->phy_read)
                mdiobus_unregister(ds->slave_mii_bus);
 }
 
@@ -443,6 +443,7 @@ static int dsa_cpu_parse(struct device_node *port, u32 index,
                         struct dsa_switch_tree *dst,
                         struct dsa_switch *ds)
 {
+       enum dsa_tag_protocol tag_protocol;
        struct net_device *ethernet_dev;
        struct device_node *ethernet;
 
@@ -465,7 +466,8 @@ static int dsa_cpu_parse(struct device_node *port, u32 index,
                dst->cpu_port = index;
        }
 
-       dst->tag_ops = dsa_resolve_tag_protocol(ds->drv->tag_protocol);
+       tag_protocol = ds->ops->get_tag_protocol(ds);
+       dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
        if (IS_ERR(dst->tag_ops)) {
                dev_warn(ds->dev, "No tagger for this switch\n");
                return PTR_ERR(dst->tag_ops);
@@ -541,7 +543,7 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds)
 
                ds->ports[reg].dn = port;
 
-               /* Initialize enabled_port_mask now for drv->setup()
+               /* Initialize enabled_port_mask now for ops->setup()
                 * to have access to a correct value, just like what
                 * net/dsa/dsa.c::dsa_switch_setup_one does.
                 */
index 00077a9..6cfd738 100644 (file)
@@ -81,5 +81,7 @@ extern const struct dsa_device_ops trailer_netdev_ops;
 /* tag_brcm.c */
 extern const struct dsa_device_ops brcm_netdev_ops;
 
+/* tag_qca.c */
+extern const struct dsa_device_ops qca_netdev_ops;
 
 #endif
index fc91967..9ecbe78 100644 (file)
@@ -28,7 +28,7 @@ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
        struct dsa_switch *ds = bus->priv;
 
        if (ds->phys_mii_mask & (1 << addr))
-               return ds->drv->phy_read(ds, addr, reg);
+               return ds->ops->phy_read(ds, addr, reg);
 
        return 0xffff;
 }
@@ -38,7 +38,7 @@ static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val)
        struct dsa_switch *ds = bus->priv;
 
        if (ds->phys_mii_mask & (1 << addr))
-               return ds->drv->phy_write(ds, addr, reg, val);
+               return ds->ops->phy_write(ds, addr, reg, val);
 
        return 0;
 }
@@ -98,14 +98,14 @@ static int dsa_slave_open(struct net_device *dev)
                        goto clear_allmulti;
        }
 
-       if (ds->drv->port_enable) {
-               err = ds->drv->port_enable(ds, p->port, p->phy);
+       if (ds->ops->port_enable) {
+               err = ds->ops->port_enable(ds, p->port, p->phy);
                if (err)
                        goto clear_promisc;
        }
 
-       if (ds->drv->port_stp_state_set)
-               ds->drv->port_stp_state_set(ds, p->port, stp_state);
+       if (ds->ops->port_stp_state_set)
+               ds->ops->port_stp_state_set(ds, p->port, stp_state);
 
        if (p->phy)
                phy_start(p->phy);
@@ -144,11 +144,11 @@ static int dsa_slave_close(struct net_device *dev)
        if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
                dev_uc_del(master, dev->dev_addr);
 
-       if (ds->drv->port_disable)
-               ds->drv->port_disable(ds, p->port, p->phy);
+       if (ds->ops->port_disable)
+               ds->ops->port_disable(ds, p->port, p->phy);
 
-       if (ds->drv->port_stp_state_set)
-               ds->drv->port_stp_state_set(ds, p->port, BR_STATE_DISABLED);
+       if (ds->ops->port_stp_state_set)
+               ds->ops->port_stp_state_set(ds, p->port, BR_STATE_DISABLED);
 
        return 0;
 }
@@ -209,13 +209,13 @@ static int dsa_slave_port_vlan_add(struct net_device *dev,
        struct dsa_switch *ds = p->parent;
 
        if (switchdev_trans_ph_prepare(trans)) {
-               if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add)
+               if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
                        return -EOPNOTSUPP;
 
-               return ds->drv->port_vlan_prepare(ds, p->port, vlan, trans);
+               return ds->ops->port_vlan_prepare(ds, p->port, vlan, trans);
        }
 
-       ds->drv->port_vlan_add(ds, p->port, vlan, trans);
+       ds->ops->port_vlan_add(ds, p->port, vlan, trans);
 
        return 0;
 }
@@ -226,10 +226,10 @@ static int dsa_slave_port_vlan_del(struct net_device *dev,
        struct dsa_slave_priv *p = netdev_priv(dev);
        struct dsa_switch *ds = p->parent;
 
-       if (!ds->drv->port_vlan_del)
+       if (!ds->ops->port_vlan_del)
                return -EOPNOTSUPP;
 
-       return ds->drv->port_vlan_del(ds, p->port, vlan);
+       return ds->ops->port_vlan_del(ds, p->port, vlan);
 }
 
 static int dsa_slave_port_vlan_dump(struct net_device *dev,
@@ -239,8 +239,8 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev,
        struct dsa_slave_priv *p = netdev_priv(dev);
        struct dsa_switch *ds = p->parent;
 
-       if (ds->drv->port_vlan_dump)
-               return ds->drv->port_vlan_dump(ds, p->port, vlan, cb);
+       if (ds->ops->port_vlan_dump)
+               return ds->ops->port_vlan_dump(ds, p->port, vlan, cb);
 
        return -EOPNOTSUPP;
 }
@@ -253,13 +253,13 @@ static int dsa_slave_port_fdb_add(struct net_device *dev,
        struct dsa_switch *ds = p->parent;
 
        if (switchdev_trans_ph_prepare(trans)) {
-               if (!ds->drv->port_fdb_prepare || !ds->drv->port_fdb_add)
+               if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
                        return -EOPNOTSUPP;
 
-               return ds->drv->port_fdb_prepare(ds, p->port, fdb, trans);
+               return ds->ops->port_fdb_prepare(ds, p->port, fdb, trans);
        }
 
-       ds->drv->port_fdb_add(ds, p->port, fdb, trans);
+       ds->ops->port_fdb_add(ds, p->port, fdb, trans);
 
        return 0;
 }
@@ -271,8 +271,8 @@ static int dsa_slave_port_fdb_del(struct net_device *dev,
        struct dsa_switch *ds = p->parent;
        int ret = -EOPNOTSUPP;
 
-       if (ds->drv->port_fdb_del)
-               ret = ds->drv->port_fdb_del(ds, p->port, fdb);
+       if (ds->ops->port_fdb_del)
+               ret = ds->ops->port_fdb_del(ds, p->port, fdb);
 
        return ret;
 }
@@ -284,8 +284,52 @@ static int dsa_slave_port_fdb_dump(struct net_device *dev,
        struct dsa_slave_priv *p = netdev_priv(dev);
        struct dsa_switch *ds = p->parent;
 
-       if (ds->drv->port_fdb_dump)
-               return ds->drv->port_fdb_dump(ds, p->port, fdb, cb);
+       if (ds->ops->port_fdb_dump)
+               return ds->ops->port_fdb_dump(ds, p->port, fdb, cb);
+
+       return -EOPNOTSUPP;
+}
+
+static int dsa_slave_port_mdb_add(struct net_device *dev,
+                                 const struct switchdev_obj_port_mdb *mdb,
+                                 struct switchdev_trans *trans)
+{
+       struct dsa_slave_priv *p = netdev_priv(dev);
+       struct dsa_switch *ds = p->parent;
+
+       if (switchdev_trans_ph_prepare(trans)) {
+               if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
+                       return -EOPNOTSUPP;
+
+               return ds->ops->port_mdb_prepare(ds, p->port, mdb, trans);
+       }
+
+       ds->ops->port_mdb_add(ds, p->port, mdb, trans);
+
+       return 0;
+}
+
+static int dsa_slave_port_mdb_del(struct net_device *dev,
+                                 const struct switchdev_obj_port_mdb *mdb)
+{
+       struct dsa_slave_priv *p = netdev_priv(dev);
+       struct dsa_switch *ds = p->parent;
+
+       if (ds->ops->port_mdb_del)
+               return ds->ops->port_mdb_del(ds, p->port, mdb);
+
+       return -EOPNOTSUPP;
+}
+
+static int dsa_slave_port_mdb_dump(struct net_device *dev,
+                                  struct switchdev_obj_port_mdb *mdb,
+                                  switchdev_obj_dump_cb_t *cb)
+{
+       struct dsa_slave_priv *p = netdev_priv(dev);
+       struct dsa_switch *ds = p->parent;
+
+       if (ds->ops->port_mdb_dump)
+               return ds->ops->port_mdb_dump(ds, p->port, mdb, cb);
 
        return -EOPNOTSUPP;
 }
@@ -308,9 +352,9 @@ static int dsa_slave_stp_state_set(struct net_device *dev,
        struct dsa_switch *ds = p->parent;
 
        if (switchdev_trans_ph_prepare(trans))
-               return ds->drv->port_stp_state_set ? 0 : -EOPNOTSUPP;
+               return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP;
 
-       ds->drv->port_stp_state_set(ds, p->port, attr->u.stp_state);
+       ds->ops->port_stp_state_set(ds, p->port, attr->u.stp_state);
 
        return 0;
 }
@@ -326,8 +370,8 @@ static int dsa_slave_vlan_filtering(struct net_device *dev,
        if (switchdev_trans_ph_prepare(trans))
                return 0;
 
-       if (ds->drv->port_vlan_filtering)
-               return ds->drv->port_vlan_filtering(ds, p->port,
+       if (ds->ops->port_vlan_filtering)
+               return ds->ops->port_vlan_filtering(ds, p->port,
                                                    attr->u.vlan_filtering);
 
        return 0;
@@ -365,8 +409,8 @@ static int dsa_slave_ageing_time(struct net_device *dev,
        ds->ports[p->port].ageing_time = ageing_time;
        ageing_time = dsa_fastest_ageing_time(ds, ageing_time);
 
-       if (ds->drv->set_ageing_time)
-               return ds->drv->set_ageing_time(ds, ageing_time);
+       if (ds->ops->set_ageing_time)
+               return ds->ops->set_ageing_time(ds, ageing_time);
 
        return 0;
 }
@@ -412,6 +456,10 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
                                             SWITCHDEV_OBJ_PORT_FDB(obj),
                                             trans);
                break;
+       case SWITCHDEV_OBJ_ID_PORT_MDB:
+               err = dsa_slave_port_mdb_add(dev, SWITCHDEV_OBJ_PORT_MDB(obj),
+                                            trans);
+               break;
        case SWITCHDEV_OBJ_ID_PORT_VLAN:
                err = dsa_slave_port_vlan_add(dev,
                                              SWITCHDEV_OBJ_PORT_VLAN(obj),
@@ -435,6 +483,9 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
                err = dsa_slave_port_fdb_del(dev,
                                             SWITCHDEV_OBJ_PORT_FDB(obj));
                break;
+       case SWITCHDEV_OBJ_ID_PORT_MDB:
+               err = dsa_slave_port_mdb_del(dev, SWITCHDEV_OBJ_PORT_MDB(obj));
+               break;
        case SWITCHDEV_OBJ_ID_PORT_VLAN:
                err = dsa_slave_port_vlan_del(dev,
                                              SWITCHDEV_OBJ_PORT_VLAN(obj));
@@ -459,6 +510,10 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
                                              SWITCHDEV_OBJ_PORT_FDB(obj),
                                              cb);
                break;
+       case SWITCHDEV_OBJ_ID_PORT_MDB:
+               err = dsa_slave_port_mdb_dump(dev, SWITCHDEV_OBJ_PORT_MDB(obj),
+                                             cb);
+               break;
        case SWITCHDEV_OBJ_ID_PORT_VLAN:
                err = dsa_slave_port_vlan_dump(dev,
                                               SWITCHDEV_OBJ_PORT_VLAN(obj),
@@ -481,8 +536,8 @@ static int dsa_slave_bridge_port_join(struct net_device *dev,
 
        p->bridge_dev = br;
 
-       if (ds->drv->port_bridge_join)
-               ret = ds->drv->port_bridge_join(ds, p->port, br);
+       if (ds->ops->port_bridge_join)
+               ret = ds->ops->port_bridge_join(ds, p->port, br);
 
        return ret == -EOPNOTSUPP ? 0 : ret;
 }
@@ -493,16 +548,16 @@ static void dsa_slave_bridge_port_leave(struct net_device *dev)
        struct dsa_switch *ds = p->parent;
 
 
-       if (ds->drv->port_bridge_leave)
-               ds->drv->port_bridge_leave(ds, p->port);
+       if (ds->ops->port_bridge_leave)
+               ds->ops->port_bridge_leave(ds, p->port);
 
        p->bridge_dev = NULL;
 
        /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
         * so allow it to be in BR_STATE_FORWARDING to be kept functional
         */
-       if (ds->drv->port_stp_state_set)
-               ds->drv->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING);
+       if (ds->ops->port_stp_state_set)
+               ds->ops->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING);
 }
 
 static int dsa_slave_port_attr_get(struct net_device *dev,
@@ -605,8 +660,8 @@ static int dsa_slave_get_regs_len(struct net_device *dev)
        struct dsa_slave_priv *p = netdev_priv(dev);
        struct dsa_switch *ds = p->parent;
 
-       if (ds->drv->get_regs_len)
-               return ds->drv->get_regs_len(ds, p->port);
+       if (ds->ops->get_regs_len)
+               return ds->ops->get_regs_len(ds, p->port);
 
        return -EOPNOTSUPP;
 }
@@ -617,8 +672,8 @@ dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
        struct dsa_slave_priv *p = netdev_priv(dev);
        struct dsa_switch *ds = p->parent;
 
-       if (ds->drv->get_regs)
-               ds->drv->get_regs(ds, p->port, regs, _p);
+       if (ds->ops->get_regs)
+               ds->ops->get_regs(ds, p->port, regs, _p);
 }
 
 static int dsa_slave_nway_reset(struct net_device *dev)
@@ -651,8 +706,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev)
        if (ds->cd && ds->cd->eeprom_len)
                return ds->cd->eeprom_len;
 
-       if (ds->drv->get_eeprom_len)
-               return ds->drv->get_eeprom_len(ds);
+       if (ds->ops->get_eeprom_len)
+               return ds->ops->get_eeprom_len(ds);
 
        return 0;
 }
@@ -663,8 +718,8 @@ static int dsa_slave_get_eeprom(struct net_device *dev,
        struct dsa_slave_priv *p = netdev_priv(dev);
        struct dsa_switch *ds = p->parent;
 
-       if (ds->drv->get_eeprom)
-               return ds->drv->get_eeprom(ds, eeprom, data);
+       if (ds->ops->get_eeprom)
+               return ds->ops->get_eeprom(ds, eeprom, data);
 
        return -EOPNOTSUPP;
 }
@@ -675,8 +730,8 @@ static int dsa_slave_set_eeprom(struct net_device *dev,
        struct dsa_slave_priv *p = netdev_priv(dev);
        struct dsa_switch *ds = p->parent;
 
-       if (ds->drv->set_eeprom)
-               return ds->drv->set_eeprom(ds, eeprom, data);
+       if (ds->ops->set_eeprom)
+               return ds->ops->set_eeprom(ds, eeprom, data);
 
        return -EOPNOTSUPP;
 }
@@ -694,8 +749,8 @@ static void dsa_slave_get_strings(struct net_device *dev,
                strncpy(data + len, "tx_bytes", len);
                strncpy(data + 2 * len, "rx_packets", len);
                strncpy(data + 3 * len, "rx_bytes", len);
-               if (ds->drv->get_strings != NULL)
-                       ds->drv->get_strings(ds, p->port, data + 4 * len);
+               if (ds->ops->get_strings)
+                       ds->ops->get_strings(ds, p->port, data + 4 * len);
        }
 }
 
@@ -714,8 +769,8 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev,
                dst->master_ethtool_ops.get_ethtool_stats(dev, stats, data);
        }
 
-       if (ds->drv->get_ethtool_stats)
-               ds->drv->get_ethtool_stats(ds, cpu_port, data + count);
+       if (ds->ops->get_ethtool_stats)
+               ds->ops->get_ethtool_stats(ds, cpu_port, data + count);
 }
 
 static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
@@ -727,8 +782,8 @@ static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
        if (dst->master_ethtool_ops.get_sset_count)
                count += dst->master_ethtool_ops.get_sset_count(dev, sset);
 
-       if (sset == ETH_SS_STATS && ds->drv->get_sset_count)
-               count += ds->drv->get_sset_count(ds);
+       if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
+               count += ds->ops->get_sset_count(ds);
 
        return count;
 }
@@ -755,14 +810,14 @@ static void dsa_cpu_port_get_strings(struct net_device *dev,
                dst->master_ethtool_ops.get_strings(dev, stringset, data);
        }
 
-       if (stringset == ETH_SS_STATS && ds->drv->get_strings) {
+       if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
                ndata = data + mcount * len;
                /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
                 * the output after to prepend our CPU port prefix we
                 * constructed earlier
                 */
-               ds->drv->get_strings(ds, cpu_port, ndata);
-               count = ds->drv->get_sset_count(ds);
+               ds->ops->get_strings(ds, cpu_port, ndata);
+               count = ds->ops->get_sset_count(ds);
                for (i = 0; i < count; i++) {
                        memmove(ndata + (i * len + sizeof(pfx)),
                                ndata + i * len, len - sizeof(pfx));
@@ -782,8 +837,8 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev,
        data[1] = dev->stats.tx_bytes;
        data[2] = dev->stats.rx_packets;
        data[3] = dev->stats.rx_bytes;
-       if (ds->drv->get_ethtool_stats != NULL)
-               ds->drv->get_ethtool_stats(ds, p->port, data + 4);
+       if (ds->ops->get_ethtool_stats)
+               ds->ops->get_ethtool_stats(ds, p->port, data + 4);
 }
 
 static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
@@ -795,8 +850,8 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
                int count;
 
                count = 4;
-               if (ds->drv->get_sset_count != NULL)
-                       count += ds->drv->get_sset_count(ds);
+               if (ds->ops->get_sset_count)
+                       count += ds->ops->get_sset_count(ds);
 
                return count;
        }
@@ -809,8 +864,8 @@ static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
        struct dsa_slave_priv *p = netdev_priv(dev);
        struct dsa_switch *ds = p->parent;
 
-       if (ds->drv->get_wol)
-               ds->drv->get_wol(ds, p->port, w);
+       if (ds->ops->get_wol)
+               ds->ops->get_wol(ds, p->port, w);
 }
 
 static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
@@ -819,8 +874,8 @@ static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
        struct dsa_switch *ds = p->parent;
        int ret = -EOPNOTSUPP;
 
-       if (ds->drv->set_wol)
-               ret = ds->drv->set_wol(ds, p->port, w);
+       if (ds->ops->set_wol)
+               ret = ds->ops->set_wol(ds, p->port, w);
 
        return ret;
 }
@@ -831,10 +886,10 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
        struct dsa_switch *ds = p->parent;
        int ret;
 
-       if (!ds->drv->set_eee)
+       if (!ds->ops->set_eee)
                return -EOPNOTSUPP;
 
-       ret = ds->drv->set_eee(ds, p->port, p->phy, e);
+       ret = ds->ops->set_eee(ds, p->port, p->phy, e);
        if (ret)
                return ret;
 
@@ -850,10 +905,10 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
        struct dsa_switch *ds = p->parent;
        int ret;
 
-       if (!ds->drv->get_eee)
+       if (!ds->ops->get_eee)
                return -EOPNOTSUPP;
 
-       ret = ds->drv->get_eee(ds, p->port, e);
+       ret = ds->ops->get_eee(ds, p->port, e);
        if (ret)
                return ret;
 
@@ -988,8 +1043,8 @@ static void dsa_slave_adjust_link(struct net_device *dev)
                p->old_pause = p->phy->pause;
        }
 
-       if (ds->drv->adjust_link && status_changed)
-               ds->drv->adjust_link(ds, p->port, p->phy);
+       if (ds->ops->adjust_link && status_changed)
+               ds->ops->adjust_link(ds, p->port, p->phy);
 
        if (status_changed)
                phy_print_status(p->phy);
@@ -1004,8 +1059,8 @@ static int dsa_slave_fixed_link_update(struct net_device *dev,
        if (dev) {
                p = netdev_priv(dev);
                ds = p->parent;
-               if (ds->drv->fixed_link_update)
-                       ds->drv->fixed_link_update(ds, p->port, status);
+               if (ds->ops->fixed_link_update)
+                       ds->ops->fixed_link_update(ds, p->port, status);
        }
 
        return 0;
@@ -1062,8 +1117,8 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
                phy_dn = port_dn;
        }
 
-       if (ds->drv->get_phy_flags)
-               phy_flags = ds->drv->get_phy_flags(ds, p->port);
+       if (ds->ops->get_phy_flags)
+               phy_flags = ds->ops->get_phy_flags(ds, p->port);
 
        if (phy_dn) {
                int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn);
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
new file mode 100644 (file)
index 0000000..0c90cac
--- /dev/null
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/etherdevice.h>
+#include "dsa_priv.h"
+
+#define QCA_HDR_LEN    2
+#define QCA_HDR_VERSION        0x2
+
+#define QCA_HDR_RECV_VERSION_MASK      GENMASK(15, 14)
+#define QCA_HDR_RECV_VERSION_S         14
+#define QCA_HDR_RECV_PRIORITY_MASK     GENMASK(13, 11)
+#define QCA_HDR_RECV_PRIORITY_S                11
+#define QCA_HDR_RECV_TYPE_MASK         GENMASK(10, 6)
+#define QCA_HDR_RECV_TYPE_S            6
+#define QCA_HDR_RECV_FRAME_IS_TAGGED   BIT(3)
+#define QCA_HDR_RECV_SOURCE_PORT_MASK  GENMASK(2, 0)
+
+#define QCA_HDR_XMIT_VERSION_MASK      GENMASK(15, 14)
+#define QCA_HDR_XMIT_VERSION_S         14
+#define QCA_HDR_XMIT_PRIORITY_MASK     GENMASK(13, 11)
+#define QCA_HDR_XMIT_PRIORITY_S                11
+#define QCA_HDR_XMIT_CONTROL_MASK      GENMASK(10, 8)
+#define QCA_HDR_XMIT_CONTROL_S         8
+#define QCA_HDR_XMIT_FROM_CPU          BIT(7)
+#define QCA_HDR_XMIT_DP_BIT_MASK       GENMASK(6, 0)
+
+static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct dsa_slave_priv *p = netdev_priv(dev);
+       u16 *phdr, hdr;
+
+       dev->stats.tx_packets++;
+       dev->stats.tx_bytes += skb->len;
+
+       if (skb_cow_head(skb, 0) < 0)
+               goto out_free;
+
+       skb_push(skb, QCA_HDR_LEN);
+
+       memmove(skb->data, skb->data + QCA_HDR_LEN, 2 * ETH_ALEN);
+       phdr = (u16 *)(skb->data + 2 * ETH_ALEN);
+
+       /* Set the version field, and set destination port information */
+       hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S |
+               QCA_HDR_XMIT_FROM_CPU |
+               BIT(p->port);
+
+       *phdr = htons(hdr);
+
+       return skb;
+
+out_free:
+       kfree_skb(skb);
+       return NULL;
+}
+
+static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
+                      struct packet_type *pt, struct net_device *orig_dev)
+{
+       struct dsa_switch_tree *dst = dev->dsa_ptr;
+       struct dsa_switch *ds;
+       u8 ver;
+       int port;
+       __be16 *phdr, hdr;
+
+       if (unlikely(!dst))
+               goto out_drop;
+
+       skb = skb_unshare(skb, GFP_ATOMIC);
+       if (!skb)
+               goto out;
+
+       if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN)))
+               goto out_drop;
+
+       /* The QCA header is added by the switch between src addr and Ethertype
+        * At this point, skb->data points to ethertype so header should be
+        * right before
+        */
+       phdr = (__be16 *)(skb->data - 2);
+       hdr = ntohs(*phdr);
+
+       /* Make sure the version is correct */
+       ver = (hdr & QCA_HDR_RECV_VERSION_MASK) >> QCA_HDR_RECV_VERSION_S;
+       if (unlikely(ver != QCA_HDR_VERSION))
+               goto out_drop;
+
+       /* Remove QCA tag and recalculate checksum */
+       skb_pull_rcsum(skb, QCA_HDR_LEN);
+       memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN,
+               ETH_HLEN - QCA_HDR_LEN);
+
+       /* This protocol doesn't support cascading multiple switches so it's
+        * safe to assume the switch is first in the tree
+        */
+       ds = dst->ds[0];
+       if (!ds)
+               goto out_drop;
+
+       /* Get source port information */
+       port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
+       if (!ds->ports[port].netdev)
+               goto out_drop;
+
+       /* Update skb & forward the frame accordingly */
+       skb_push(skb, ETH_HLEN);
+       skb->pkt_type = PACKET_HOST;
+       skb->dev = ds->ports[port].netdev;
+       skb->protocol = eth_type_trans(skb, skb->dev);
+
+       skb->dev->stats.rx_packets++;
+       skb->dev->stats.rx_bytes += skb->len;
+
+       netif_receive_skb(skb);
+
+       return 0;
+
+out_drop:
+       kfree_skb(skb);
+out:
+       return 0;
+}
+
+const struct dsa_device_ops qca_netdev_ops = {
+       .xmit   = qca_tag_xmit,
+       .rcv    = qca_tag_rcv,
+};
index 55513e6..e94b47b 100644 (file)
@@ -211,24 +211,19 @@ int inet_listen(struct socket *sock, int backlog)
         * we can only allow the backlog to be adjusted.
         */
        if (old_state != TCP_LISTEN) {
-               /* Check special setups for testing purpose to enable TFO w/o
-                * requiring TCP_FASTOPEN sockopt.
+               /* Enable TFO w/o requiring TCP_FASTOPEN socket option.
                 * Note that only TCP sockets (SOCK_STREAM) will reach here.
-                * Also fastopenq may already been allocated because this
-                * socket was in TCP_LISTEN state previously but was
-                * shutdown() (rather than close()).
+                * Also fastopen backlog may already been set via the option
+                * because the socket was in TCP_LISTEN state previously but
+                * was shutdown() rather than close().
                 */
-               if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 &&
+               if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
+                   (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
                    !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
-                       if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0)
-                               fastopen_queue_tune(sk, backlog);
-                       else if ((sysctl_tcp_fastopen &
-                                 TFO_SERVER_WO_SOCKOPT2) != 0)
-                               fastopen_queue_tune(sk,
-                                   ((uint)sysctl_tcp_fastopen) >> 16);
-
+                       fastopen_queue_tune(sk, backlog);
                        tcp_fastopen_init_key_once(true);
                }
+
                err = inet_csk_listen_start(sk, backlog);
                if (err)
                        goto out;
@@ -921,6 +916,8 @@ const struct proto_ops inet_stream_ops = {
        .mmap              = sock_no_mmap,
        .sendpage          = inet_sendpage,
        .splice_read       = tcp_splice_read,
+       .read_sock         = tcp_read_sock,
+       .peek_len          = tcp_peek_len,
 #ifdef CONFIG_COMPAT
        .compat_setsockopt = compat_sock_common_setsockopt,
        .compat_getsockopt = compat_sock_common_getsockopt,
index 415e117..062a67c 100644 (file)
@@ -2232,7 +2232,7 @@ static struct devinet_sysctl_table {
 };
 
 static int __devinet_sysctl_register(struct net *net, char *dev_name,
-                                       struct ipv4_devconf *p)
+                                    int ifindex, struct ipv4_devconf *p)
 {
        int i;
        struct devinet_sysctl_table *t;
@@ -2255,6 +2255,8 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
                goto free;
 
        p->sysctl = t;
+
+       inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
        return 0;
 
 free:
@@ -2286,7 +2288,7 @@ static int devinet_sysctl_register(struct in_device *idev)
        if (err)
                return err;
        err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
-                                       &idev->cnf);
+                                       idev->dev->ifindex, &idev->cnf);
        if (err)
                neigh_sysctl_unregister(idev->arp_parms);
        return err;
@@ -2347,11 +2349,12 @@ static __net_init int devinet_init_net(struct net *net)
        }
 
 #ifdef CONFIG_SYSCTL
-       err = __devinet_sysctl_register(net, "all", all);
+       err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
        if (err < 0)
                goto err_reg_all;
 
-       err = __devinet_sysctl_register(net, "default", dflt);
+       err = __devinet_sysctl_register(net, "default",
+                                       NETCONFA_IFINDEX_DEFAULT, dflt);
        if (err < 0)
                goto err_reg_dflt;
 
index 317c319..4e56a4c 100644 (file)
@@ -503,6 +503,7 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
                if (!dev)
                        return -ENODEV;
                cfg->fc_oif = dev->ifindex;
+               cfg->fc_table = l3mdev_fib_table(dev);
                if (colon) {
                        struct in_ifaddr *ifa;
                        struct in_device *in_dev = __in_dev_get_rtnl(dev);
@@ -1021,7 +1022,7 @@ no_promotions:
                         * First of all, we scan fib_info list searching
                         * for stray nexthop entries, then ignite fib_flush.
                         */
-                       if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
+                       if (fib_sync_down_addr(dev, ifa->ifa_local))
                                fib_flush(dev_net(dev));
                }
        }
index 6e9ea69..770bebe 100644 (file)
@@ -56,6 +56,9 @@ int __fib_lookup(struct net *net, struct flowi4 *flp,
        };
        int err;
 
+       /* update flow if oif or iif point to device enslaved to l3mdev */
+       l3mdev_update_flow(net, flowi4_to_flowi(flp));
+
        err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg);
 #ifdef CONFIG_IP_ROUTE_CLASSID
        if (arg.rule)
index 539fa26..388d3e2 100644 (file)
@@ -1057,6 +1057,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
        fi->fib_priority = cfg->fc_priority;
        fi->fib_prefsrc = cfg->fc_prefsrc;
        fi->fib_type = cfg->fc_type;
+       fi->fib_tb_id = cfg->fc_table;
 
        fi->fib_nhs = nhs;
        change_nexthops(fi) {
@@ -1337,18 +1338,21 @@ nla_put_failure:
  *   referring to it.
  * - device went down -> we must shutdown all nexthops going via it.
  */
-int fib_sync_down_addr(struct net *net, __be32 local)
+int fib_sync_down_addr(struct net_device *dev, __be32 local)
 {
        int ret = 0;
        unsigned int hash = fib_laddr_hashfn(local);
        struct hlist_head *head = &fib_info_laddrhash[hash];
+       struct net *net = dev_net(dev);
+       int tb_id = l3mdev_fib_table(dev);
        struct fib_info *fi;
 
        if (!fib_info_laddrhash || local == 0)
                return 0;
 
        hlist_for_each_entry(fi, head, fib_lhash) {
-               if (!net_eq(fi->fib_net, net))
+               if (!net_eq(fi->fib_net, net) ||
+                   fi->fib_tb_id != tb_id)
                        continue;
                if (fi->fib_prefsrc == local) {
                        fi->fib_flags |= RTNH_F_DEAD;
@@ -1576,7 +1580,8 @@ static bool fib_good_nh(const struct fib_nh *nh)
 
                rcu_read_lock_bh();
 
-               n = __ipv4_neigh_lookup_noref(nh->nh_dev, nh->nh_gw);
+               n = __ipv4_neigh_lookup_noref(nh->nh_dev,
+                                             (__force u32)nh->nh_gw);
                if (n)
                        state = n->nud_state;
 
index febca0f..241f27b 100644 (file)
@@ -249,7 +249,7 @@ static inline unsigned long get_index(t_key key, struct key_vector *kv)
  * index into the parent's child array. That is, they will be used to find
  * 'n' among tp's children.
  *
- * The bits from (n->pos + n->bits) to (tn->pos - 1) - "S" - are skipped bits
+ * The bits from (n->pos + n->bits) to (tp->pos - 1) - "S" - are skipped bits
  * for the node n.
  *
  * All the bits we have seen so far are significant to the node n. The rest
@@ -258,7 +258,7 @@ static inline unsigned long get_index(t_key key, struct key_vector *kv)
  * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
  * n's child array, and will of course be different for each child.
  *
- * The rest of the bits, from 0 to (n->pos + n->bits), are completely unknown
+ * The rest of the bits, from 0 to (n->pos -1) - "u" - are completely unknown
  * at this point.
  */
 
@@ -1081,7 +1081,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
        struct trie *t = (struct trie *)tb->tb_data;
        struct fib_alias *fa, *new_fa;
        struct key_vector *l, *tp;
-       unsigned int nlflags = 0;
+       u16 nlflags = NLM_F_EXCL;
        struct fib_info *fi;
        u8 plen = cfg->fc_dst_len;
        u8 slen = KEYLENGTH - plen;
@@ -1126,6 +1126,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
                if (cfg->fc_nlflags & NLM_F_EXCL)
                        goto out;
 
+               nlflags &= ~NLM_F_EXCL;
+
                /* We have 2 goals:
                 * 1. Find exact match for type, scope, fib_info to avoid
                 * duplicate routes
@@ -1151,6 +1153,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
                        struct fib_info *fi_drop;
                        u8 state;
 
+                       nlflags |= NLM_F_REPLACE;
                        fa = fa_first;
                        if (fa_match) {
                                if (fa == fa_match)
@@ -1191,7 +1194,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
                        if (state & FA_S_ACCESSED)
                                rt_cache_flush(cfg->fc_nlinfo.nl_net);
                        rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
-                               tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
+                               tb->tb_id, &cfg->fc_nlinfo, nlflags);
 
                        goto succeeded;
                }
@@ -1203,7 +1206,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
                        goto out;
 
                if (cfg->fc_nlflags & NLM_F_APPEND)
-                       nlflags = NLM_F_APPEND;
+                       nlflags |= NLM_F_APPEND;
                else
                        fa = fa_first;
        }
@@ -1211,6 +1214,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
        if (!(cfg->fc_nlflags & NLM_F_CREATE))
                goto out;
 
+       nlflags |= NLM_F_CREATE;
        err = -ENOBUFS;
        new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
        if (!new_fa)
index 321d57f..cf50f7e 100644 (file)
@@ -631,7 +631,7 @@ static struct genl_family fou_nl_family = {
        .netnsok        = true,
 };
 
-static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
+static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
        [FOU_ATTR_PORT] = { .type = NLA_U16, },
        [FOU_ATTR_AF] = { .type = NLA_U8, },
        [FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
index 38c2c47..e4d16fc 100644 (file)
@@ -45,6 +45,7 @@ struct inet_diag_entry {
        u16 family;
        u16 userlocks;
        u32 ifindex;
+       u32 mark;
 };
 
 static DEFINE_MUTEX(inet_diag_table_mutex);
@@ -98,6 +99,7 @@ static size_t inet_sk_attr_size(void)
                + nla_total_size(1) /* INET_DIAG_SHUTDOWN */
                + nla_total_size(1) /* INET_DIAG_TOS */
                + nla_total_size(1) /* INET_DIAG_TCLASS */
+               + nla_total_size(4) /* INET_DIAG_MARK */
                + nla_total_size(sizeof(struct inet_diag_meminfo))
                + nla_total_size(sizeof(struct inet_diag_msg))
                + nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
@@ -108,7 +110,8 @@ static size_t inet_sk_attr_size(void)
 
 int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
                             struct inet_diag_msg *r, int ext,
-                            struct user_namespace *user_ns)
+                            struct user_namespace *user_ns,
+                            bool net_admin)
 {
        const struct inet_sock *inet = inet_sk(sk);
 
@@ -135,6 +138,9 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
        }
 #endif
 
+       if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
+               goto errout;
+
        r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
        r->idiag_inode = sock_i_ino(sk);
 
@@ -148,7 +154,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
                      struct sk_buff *skb, const struct inet_diag_req_v2 *req,
                      struct user_namespace *user_ns,
                      u32 portid, u32 seq, u16 nlmsg_flags,
-                     const struct nlmsghdr *unlh)
+                     const struct nlmsghdr *unlh,
+                     bool net_admin)
 {
        const struct tcp_congestion_ops *ca_ops;
        const struct inet_diag_handler *handler;
@@ -174,7 +181,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
        r->idiag_timer = 0;
        r->idiag_retrans = 0;
 
-       if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns))
+       if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
                goto errout;
 
        if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
@@ -273,10 +280,11 @@ static int inet_csk_diag_fill(struct sock *sk,
                              const struct inet_diag_req_v2 *req,
                              struct user_namespace *user_ns,
                              u32 portid, u32 seq, u16 nlmsg_flags,
-                             const struct nlmsghdr *unlh)
+                             const struct nlmsghdr *unlh,
+                             bool net_admin)
 {
-       return inet_sk_diag_fill(sk, inet_csk(sk), skb, req,
-                                user_ns, portid, seq, nlmsg_flags, unlh);
+       return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, user_ns,
+                                portid, seq, nlmsg_flags, unlh, net_admin);
 }
 
 static int inet_twsk_diag_fill(struct sock *sk,
@@ -318,8 +326,9 @@ static int inet_twsk_diag_fill(struct sock *sk,
 
 static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
                              u32 portid, u32 seq, u16 nlmsg_flags,
-                             const struct nlmsghdr *unlh)
+                             const struct nlmsghdr *unlh, bool net_admin)
 {
+       struct request_sock *reqsk = inet_reqsk(sk);
        struct inet_diag_msg *r;
        struct nlmsghdr *nlh;
        long tmo;
@@ -333,7 +342,7 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
        inet_diag_msg_common_fill(r, sk);
        r->idiag_state = TCP_SYN_RECV;
        r->idiag_timer = 1;
-       r->idiag_retrans = inet_reqsk(sk)->num_retrans;
+       r->idiag_retrans = reqsk->num_retrans;
 
        BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
                     offsetof(struct sock, sk_cookie));
@@ -345,6 +354,10 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
        r->idiag_uid    = 0;
        r->idiag_inode  = 0;
 
+       if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
+                                    inet_rsk(reqsk)->ir_mark))
+               return -EMSGSIZE;
+
        nlmsg_end(skb, nlh);
        return 0;
 }
@@ -353,7 +366,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
                        const struct inet_diag_req_v2 *r,
                        struct user_namespace *user_ns,
                        u32 portid, u32 seq, u16 nlmsg_flags,
-                       const struct nlmsghdr *unlh)
+                       const struct nlmsghdr *unlh, bool net_admin)
 {
        if (sk->sk_state == TCP_TIME_WAIT)
                return inet_twsk_diag_fill(sk, skb, portid, seq,
@@ -361,10 +374,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 
        if (sk->sk_state == TCP_NEW_SYN_RECV)
                return inet_req_diag_fill(sk, skb, portid, seq,
-                                         nlmsg_flags, unlh);
+                                         nlmsg_flags, unlh, net_admin);
 
        return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
-                                 nlmsg_flags, unlh);
+                                 nlmsg_flags, unlh, net_admin);
 }
 
 struct sock *inet_diag_find_one_icsk(struct net *net,
@@ -434,7 +447,8 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
        err = sk_diag_fill(sk, rep, req,
                           sk_user_ns(NETLINK_CB(in_skb).sk),
                           NETLINK_CB(in_skb).portid,
-                          nlh->nlmsg_seq, 0, nlh);
+                          nlh->nlmsg_seq, 0, nlh,
+                          netlink_net_capable(in_skb, CAP_NET_ADMIN));
        if (err < 0) {
                WARN_ON(err == -EMSGSIZE);
                nlmsg_free(rep);
@@ -580,6 +594,14 @@ static int inet_diag_bc_run(const struct nlattr *_bc,
                                yes = 0;
                        break;
                }
+               case INET_DIAG_BC_MARK_COND: {
+                       struct inet_diag_markcond *cond;
+
+                       cond = (struct inet_diag_markcond *)(op + 1);
+                       if ((entry->mark & cond->mask) != cond->mark)
+                               yes = 0;
+                       break;
+               }
                }
 
                if (yes) {
@@ -624,6 +646,12 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
        entry.dport = ntohs(inet->inet_dport);
        entry.ifindex = sk->sk_bound_dev_if;
        entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
+       if (sk_fullsock(sk))
+               entry.mark = sk->sk_mark;
+       else if (sk->sk_state == TCP_NEW_SYN_RECV)
+               entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
+       else
+               entry.mark = 0;
 
        return inet_diag_bc_run(bc, &entry);
 }
@@ -706,10 +734,25 @@ static bool valid_port_comparison(const struct inet_diag_bc_op *op,
        return true;
 }
 
-static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
+static bool valid_markcond(const struct inet_diag_bc_op *op, int len,
+                          int *min_len)
+{
+       *min_len += sizeof(struct inet_diag_markcond);
+       return len >= *min_len;
+}
+
+static int inet_diag_bc_audit(const struct nlattr *attr,
+                             const struct sk_buff *skb)
 {
-       const void *bc = bytecode;
-       int  len = bytecode_len;
+       bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
+       const void *bytecode, *bc;
+       int bytecode_len, len;
+
+       if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op))
+               return -EINVAL;
+
+       bytecode = bc = nla_data(attr);
+       len = bytecode_len = nla_len(attr);
 
        while (len > 0) {
                int min_len = sizeof(struct inet_diag_bc_op);
@@ -732,6 +775,12 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
                        if (!valid_port_comparison(bc, len, &min_len))
                                return -EINVAL;
                        break;
+               case INET_DIAG_BC_MARK_COND:
+                       if (!net_admin)
+                               return -EPERM;
+                       if (!valid_markcond(bc, len, &min_len))
+                               return -EINVAL;
+                       break;
                case INET_DIAG_BC_AUTO:
                case INET_DIAG_BC_JMP:
                case INET_DIAG_BC_NOP:
@@ -760,7 +809,8 @@ static int inet_csk_diag_dump(struct sock *sk,
                              struct sk_buff *skb,
                              struct netlink_callback *cb,
                              const struct inet_diag_req_v2 *r,
-                             const struct nlattr *bc)
+                             const struct nlattr *bc,
+                             bool net_admin)
 {
        if (!inet_diag_bc_sk(bc, sk))
                return 0;
@@ -768,7 +818,8 @@ static int inet_csk_diag_dump(struct sock *sk,
        return inet_csk_diag_fill(sk, skb, r,
                                  sk_user_ns(NETLINK_CB(cb->skb).sk),
                                  NETLINK_CB(cb->skb).portid,
-                                 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+                                 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh,
+                                 net_admin);
 }
 
 static void twsk_build_assert(void)
@@ -804,6 +855,7 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
        struct net *net = sock_net(skb->sk);
        int i, num, s_i, s_num;
        u32 idiag_states = r->idiag_states;
+       bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
 
        if (idiag_states & TCPF_SYN_RECV)
                idiag_states |= TCPF_NEW_SYN_RECV;
@@ -844,7 +896,8 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
                                    cb->args[3] > 0)
                                        goto next_listen;
 
-                               if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
+                               if (inet_csk_diag_dump(sk, skb, cb, r,
+                                                      bc, net_admin) < 0) {
                                        spin_unlock_bh(&ilb->lock);
                                        goto done;
                                }
@@ -912,7 +965,7 @@ skip_listen_ht:
                                           sk_user_ns(NETLINK_CB(cb->skb).sk),
                                           NETLINK_CB(cb->skb).portid,
                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
-                                          cb->nlh);
+                                          cb->nlh, net_admin);
                        if (res < 0) {
                                spin_unlock_bh(lock);
                                goto done;
@@ -1020,13 +1073,13 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
        if (nlh->nlmsg_flags & NLM_F_DUMP) {
                if (nlmsg_attrlen(nlh, hdrlen)) {
                        struct nlattr *attr;
+                       int err;
 
                        attr = nlmsg_find_attr(nlh, hdrlen,
                                               INET_DIAG_REQ_BYTECODE);
-                       if (!attr ||
-                           nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
-                           inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
-                               return -EINVAL;
+                       err = inet_diag_bc_audit(attr, skb);
+                       if (err)
+                               return err;
                }
                {
                        struct netlink_dump_control c = {
@@ -1051,13 +1104,13 @@ static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
            h->nlmsg_flags & NLM_F_DUMP) {
                if (nlmsg_attrlen(h, hdrlen)) {
                        struct nlattr *attr;
+                       int err;
 
                        attr = nlmsg_find_attr(h, hdrlen,
                                               INET_DIAG_REQ_BYTECODE);
-                       if (!attr ||
-                           nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
-                           inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
-                               return -EINVAL;
+                       err = inet_diag_bc_audit(attr, skb);
+                       if (err)
+                               return err;
                }
                {
                        struct netlink_dump_control c = {
index 113cc43..576f705 100644 (file)
@@ -246,25 +246,6 @@ static void gre_err(struct sk_buff *skb, u32 info)
        ipgre_err(skb, info, &tpi);
 }
 
-static __be64 key_to_tunnel_id(__be32 key)
-{
-#ifdef __BIG_ENDIAN
-       return (__force __be64)((__force u32)key);
-#else
-       return (__force __be64)((__force u64)key << 32);
-#endif
-}
-
-/* Returns the least-significant 32 bits of a __be64. */
-static __be32 tunnel_id_to_key(__be64 x)
-{
-#ifdef __BIG_ENDIAN
-       return (__force __be32)x;
-#else
-       return (__force __be32)((__force u64)x >> 32);
-#endif
-}
-
 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
                       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
 {
@@ -290,7 +271,7 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
                        __be64 tun_id;
 
                        flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
-                       tun_id = key_to_tunnel_id(tpi->key);
+                       tun_id = key32_to_tunnel_id(tpi->key);
                        tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
                        if (!tun_dst)
                                return PACKET_REJECT;
@@ -446,7 +427,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 
        flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
        gre_build_header(skb, tunnel_hlen, flags, proto,
-                        tunnel_id_to_key(tun_info->key.tun_id), 0);
+                        tunnel_id_to_key32(tun_info->key.tun_id), 0);
 
        df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
 
index dde37fb..05d1058 100644 (file)
@@ -73,6 +73,7 @@
 #include <net/icmp.h>
 #include <net/checksum.h>
 #include <net/inetpeer.h>
+#include <net/lwtunnel.h>
 #include <linux/igmp.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_bridge.h>
@@ -98,6 +99,14 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 
        iph->tot_len = htons(skb->len);
        ip_send_check(iph);
+
+       /* if egress device is enslaved to an L3 master device pass the
+        * skb to its handler for processing
+        */
+       skb = l3mdev_ip_out(sk, skb);
+       if (unlikely(!skb))
+               return 0;
+
        return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
                       net, sk, skb, NULL, skb_dst(skb)->dev,
                       dst_output);
@@ -197,6 +206,13 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
                skb = skb2;
        }
 
+       if (lwtunnel_xmit_redirect(dst->lwtstate)) {
+               int res = lwtunnel_xmit(skb);
+
+               if (res < 0 || res == LWTUNNEL_XMIT_DONE)
+                       return res;
+       }
+
        rcu_read_lock_bh();
        nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
        neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
@@ -482,7 +498,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
        to->tc_index = from->tc_index;
 #endif
        nf_copy(to, from);
-#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+#if IS_ENABLED(CONFIG_IP_VS)
        to->ipvs_property = from->ipvs_property;
 #endif
        skb_copy_secmark(to, from);
@@ -1566,8 +1582,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
        }
 
        oif = arg->bound_dev_if;
-       if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
-               oif = skb->skb_iif;
+       oif = oif ? : skb->skb_iif;
 
        flowi4_init_output(&fl4, oif,
                           IP4_REPLY_MARK(net, skb->mark),
index 71a52f4..af49197 100644 (file)
@@ -284,9 +284,12 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
                        ipc->ttl = val;
                        break;
                case IP_TOS:
-                       if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+                       if (cmsg->cmsg_len == CMSG_LEN(sizeof(int)))
+                               val = *(int *)CMSG_DATA(cmsg);
+                       else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8)))
+                               val = *(u8 *)CMSG_DATA(cmsg);
+                       else
                                return -EINVAL;
-                       val = *(int *)CMSG_DATA(cmsg);
                        if (val < 0 || val > 255)
                                return -EINVAL;
                        ipc->tos = val;
index 9d847c3..777bc18 100644 (file)
@@ -69,13 +69,15 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
 
        skb_scrub_packet(skb, xnet);
 
-       skb_clear_hash(skb);
+       skb_clear_hash_if_not_l4(skb);
        skb_dst_set(skb, &rt->dst);
        memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 
-       if (skb_iif && proto == IPPROTO_UDP) {
-               /* Arrived from an ingress interface and got udp encapuslated.
-                * The encapsulated network segment length may exceed dst mtu.
+       if (skb_iif && !(df & htons(IP_DF))) {
+               /* Arrived from an ingress interface, got encapsulated, with
+                * fragmentation of encapulating frames allowed.
+                * If skb is gso, the resulting encapsulated network segments
+                * may exceed dst mtu.
                 * Allow IP Fragmentation of segments.
                 */
                IPCB(skb)->flags |= IPSKB_FRAG_SEGS;
index ba9cbea..071a785 100644 (file)
@@ -306,7 +306,7 @@ static void __init ic_close_devs(void)
        while ((d = next)) {
                next = d->next;
                dev = d->dev;
-               if (dev != ic_dev->dev && !netdev_uses_dsa(dev)) {
+               if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) {
                        pr_debug("IP-Config: Downing %s\n", dev->name);
                        dev_change_flags(dev, d->flags);
                }
index c187c60..d613309 100644 (file)
@@ -25,17 +25,6 @@ config NF_CONNTRACK_IPV4
 
          To compile it as a module, choose M here.  If unsure, say N.
 
-config NF_CONNTRACK_PROC_COMPAT
-       bool "proc/sysctl compatibility with old connection tracking"
-       depends on NF_CONNTRACK_PROCFS && NF_CONNTRACK_IPV4
-       default y
-       help
-         This option enables /proc and sysctl compatibility with the old
-         layer 3 dependent connection tracking. This is needed to keep
-         old programs that have not been adapted to the new names working.
-
-         If unsure, say Y.
-
 if NF_TABLES
 
 config NF_TABLES_IPV4
index 87b073d..853328f 100644 (file)
@@ -4,11 +4,6 @@
 
 # objects for l3 independent conntrack
 nf_conntrack_ipv4-y    :=  nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
-ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y)
-ifeq ($(CONFIG_PROC_FS),y)
-nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o
-endif
-endif
 
 # connection tracking
 obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
index ae1a71a..870aebd 100644 (file)
@@ -202,47 +202,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
        },
 };
 
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-static int log_invalid_proto_min = 0;
-static int log_invalid_proto_max = 255;
-
-static struct ctl_table ip_ct_sysctl_table[] = {
-       {
-               .procname       = "ip_conntrack_max",
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "ip_conntrack_count",
-               .maxlen         = sizeof(int),
-               .mode           = 0444,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "ip_conntrack_buckets",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0444,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "ip_conntrack_checksum",
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "ip_conntrack_log_invalid",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &log_invalid_proto_min,
-               .extra2         = &log_invalid_proto_max,
-       },
-       { }
-};
-#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */
-
 /* Fast function for those who don't want to parse /proc (and I don't
    blame them). */
 /* Reversing the socket's dst/src point of view gives us the reply
@@ -350,20 +309,6 @@ static struct nf_sockopt_ops so_getorigdst = {
 
 static int ipv4_init_net(struct net *net)
 {
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-       struct nf_ip_net *in = &net->ct.nf_ct_proto;
-       in->ctl_table = kmemdup(ip_ct_sysctl_table,
-                               sizeof(ip_ct_sysctl_table),
-                               GFP_KERNEL);
-       if (!in->ctl_table)
-               return -ENOMEM;
-
-       in->ctl_table[0].data = &nf_conntrack_max;
-       in->ctl_table[1].data = &net->ct.count;
-       in->ctl_table[2].data = &nf_conntrack_htable_size;
-       in->ctl_table[3].data = &net->ct.sysctl_checksum;
-       in->ctl_table[4].data = &net->ct.sysctl_log_invalid;
-#endif
        return 0;
 }
 
@@ -379,9 +324,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
        .nlattr_tuple_size = ipv4_nlattr_tuple_size,
        .nlattr_to_tuple = ipv4_nlattr_to_tuple,
        .nla_policy      = ipv4_nla_policy,
-#endif
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-       .ctl_table_path  = "net/ipv4/netfilter",
 #endif
        .init_net        = ipv4_init_net,
        .me              = THIS_MODULE,
@@ -492,16 +434,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
                goto cleanup_icmpv4;
        }
 
-#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-       ret = nf_conntrack_ipv4_compat_init();
-       if (ret < 0)
-               goto cleanup_proto;
-#endif
        return ret;
-#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- cleanup_proto:
-       nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
-#endif
  cleanup_icmpv4:
        nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
  cleanup_udp4:
@@ -520,9 +453,6 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
 static void __exit nf_conntrack_l3proto_ipv4_fini(void)
 {
        synchronize_net();
-#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-       nf_conntrack_ipv4_compat_fini();
-#endif
        nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
        nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
        nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
deleted file mode 100644 (file)
index 6392371..0000000
+++ /dev/null
@@ -1,492 +0,0 @@
-/* ip_conntrack proc compat - based on ip_conntrack_standalone.c
- *
- * (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/types.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/percpu.h>
-#include <linux/security.h>
-#include <net/net_namespace.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_expect.h>
-#include <net/netfilter/nf_conntrack_acct.h>
-#include <linux/rculist_nulls.h>
-#include <linux/export.h>
-
-struct ct_iter_state {
-       struct seq_net_private p;
-       struct hlist_nulls_head *hash;
-       unsigned int htable_size;
-       unsigned int bucket;
-};
-
-static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
-{
-       struct ct_iter_state *st = seq->private;
-       struct hlist_nulls_node *n;
-
-       for (st->bucket = 0;
-            st->bucket < st->htable_size;
-            st->bucket++) {
-               n = rcu_dereference(
-                       hlist_nulls_first_rcu(&st->hash[st->bucket]));
-               if (!is_a_nulls(n))
-                       return n;
-       }
-       return NULL;
-}
-
-static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
-                                     struct hlist_nulls_node *head)
-{
-       struct ct_iter_state *st = seq->private;
-
-       head = rcu_dereference(hlist_nulls_next_rcu(head));
-       while (is_a_nulls(head)) {
-               if (likely(get_nulls_value(head) == st->bucket)) {
-                       if (++st->bucket >= st->htable_size)
-                               return NULL;
-               }
-               head = rcu_dereference(
-                       hlist_nulls_first_rcu(&st->hash[st->bucket]));
-       }
-       return head;
-}
-
-static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
-{
-       struct hlist_nulls_node *head = ct_get_first(seq);
-
-       if (head)
-               while (pos && (head = ct_get_next(seq, head)))
-                       pos--;
-       return pos ? NULL : head;
-}
-
-static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
-       __acquires(RCU)
-{
-       struct ct_iter_state *st = seq->private;
-
-       rcu_read_lock();
-
-       nf_conntrack_get_ht(&st->hash, &st->htable_size);
-       return ct_get_idx(seq, *pos);
-}
-
-static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
-       (*pos)++;
-       return ct_get_next(s, v);
-}
-
-static void ct_seq_stop(struct seq_file *s, void *v)
-       __releases(RCU)
-{
-       rcu_read_unlock();
-}
-
-#ifdef CONFIG_NF_CONNTRACK_SECMARK
-static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
-{
-       int ret;
-       u32 len;
-       char *secctx;
-
-       ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
-       if (ret)
-               return;
-
-       seq_printf(s, "secctx=%s ", secctx);
-
-       security_release_secctx(secctx, len);
-}
-#else
-static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
-{
-}
-#endif
-
-static bool ct_seq_should_skip(const struct nf_conn *ct,
-                              const struct net *net,
-                              const struct nf_conntrack_tuple_hash *hash)
-{
-       /* we only want to print DIR_ORIGINAL */
-       if (NF_CT_DIRECTION(hash))
-               return true;
-
-       if (nf_ct_l3num(ct) != AF_INET)
-               return true;
-
-       if (!net_eq(nf_ct_net(ct), net))
-               return true;
-
-       return false;
-}
-
-static int ct_seq_show(struct seq_file *s, void *v)
-{
-       struct nf_conntrack_tuple_hash *hash = v;
-       struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
-       const struct nf_conntrack_l3proto *l3proto;
-       const struct nf_conntrack_l4proto *l4proto;
-       int ret = 0;
-
-       NF_CT_ASSERT(ct);
-       if (ct_seq_should_skip(ct, seq_file_net(s), hash))
-               return 0;
-
-       if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
-               return 0;
-
-       /* check if we raced w. object reuse */
-       if (!nf_ct_is_confirmed(ct) ||
-           ct_seq_should_skip(ct, seq_file_net(s), hash))
-               goto release;
-
-       l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
-       NF_CT_ASSERT(l3proto);
-       l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
-       NF_CT_ASSERT(l4proto);
-
-       ret = -ENOSPC;
-       seq_printf(s, "%-8s %u %ld ",
-                  l4proto->name, nf_ct_protonum(ct),
-                  timer_pending(&ct->timeout)
-                  ? (long)(ct->timeout.expires - jiffies)/HZ : 0);
-
-       if (l4proto->print_conntrack)
-               l4proto->print_conntrack(s, ct);
-
-       if (seq_has_overflowed(s))
-               goto release;
-
-       print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-                   l3proto, l4proto);
-
-       if (seq_has_overflowed(s))
-               goto release;
-
-       if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
-               goto release;
-
-       if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
-               seq_printf(s, "[UNREPLIED] ");
-
-       print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
-                   l3proto, l4proto);
-
-       if (seq_has_overflowed(s))
-               goto release;
-
-       if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
-               goto release;
-
-       if (test_bit(IPS_ASSURED_BIT, &ct->status))
-               seq_printf(s, "[ASSURED] ");
-
-#ifdef CONFIG_NF_CONNTRACK_MARK
-       seq_printf(s, "mark=%u ", ct->mark);
-#endif
-
-       ct_show_secctx(s, ct);
-
-       seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
-
-       if (seq_has_overflowed(s))
-               goto release;
-
-       ret = 0;
-release:
-       nf_ct_put(ct);
-       return ret;
-}
-
-static const struct seq_operations ct_seq_ops = {
-       .start = ct_seq_start,
-       .next  = ct_seq_next,
-       .stop  = ct_seq_stop,
-       .show  = ct_seq_show
-};
-
-static int ct_open(struct inode *inode, struct file *file)
-{
-       return seq_open_net(inode, file, &ct_seq_ops,
-                           sizeof(struct ct_iter_state));
-}
-
-static const struct file_operations ct_file_ops = {
-       .owner   = THIS_MODULE,
-       .open    = ct_open,
-       .read    = seq_read,
-       .llseek  = seq_lseek,
-       .release = seq_release_net,
-};
-
-/* expects */
-struct ct_expect_iter_state {
-       struct seq_net_private p;
-       unsigned int bucket;
-};
-
-static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
-{
-       struct ct_expect_iter_state *st = seq->private;
-       struct hlist_node *n;
-
-       for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-               n = rcu_dereference(
-                       hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
-               if (n)
-                       return n;
-       }
-       return NULL;
-}
-
-static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
-                                            struct hlist_node *head)
-{
-       struct ct_expect_iter_state *st = seq->private;
-
-       head = rcu_dereference(hlist_next_rcu(head));
-       while (head == NULL) {
-               if (++st->bucket >= nf_ct_expect_hsize)
-                       return NULL;
-               head = rcu_dereference(
-                       hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
-       }
-       return head;
-}
-
-static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
-{
-       struct hlist_node *head = ct_expect_get_first(seq);
-
-       if (head)
-               while (pos && (head = ct_expect_get_next(seq, head)))
-                       pos--;
-       return pos ? NULL : head;
-}
-
-static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
-       __acquires(RCU)
-{
-       rcu_read_lock();
-       return ct_expect_get_idx(seq, *pos);
-}
-
-static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-       (*pos)++;
-       return ct_expect_get_next(seq, v);
-}
-
-static void exp_seq_stop(struct seq_file *seq, void *v)
-       __releases(RCU)
-{
-       rcu_read_unlock();
-}
-
-static int exp_seq_show(struct seq_file *s, void *v)
-{
-       struct nf_conntrack_expect *exp;
-       const struct hlist_node *n = v;
-
-       exp = hlist_entry(n, struct nf_conntrack_expect, hnode);
-
-       if (!net_eq(nf_ct_net(exp->master), seq_file_net(s)))
-               return 0;
-
-       if (exp->tuple.src.l3num != AF_INET)
-               return 0;
-
-       if (exp->timeout.function)
-               seq_printf(s, "%ld ", timer_pending(&exp->timeout)
-                          ? (long)(exp->timeout.expires - jiffies)/HZ : 0);
-       else
-               seq_printf(s, "- ");
-
-       seq_printf(s, "proto=%u ", exp->tuple.dst.protonum);
-
-       print_tuple(s, &exp->tuple,
-                   __nf_ct_l3proto_find(exp->tuple.src.l3num),
-                   __nf_ct_l4proto_find(exp->tuple.src.l3num,
-                                        exp->tuple.dst.protonum));
-       seq_putc(s, '\n');
-
-       return 0;
-}
-
-static const struct seq_operations exp_seq_ops = {
-       .start = exp_seq_start,
-       .next = exp_seq_next,
-       .stop = exp_seq_stop,
-       .show = exp_seq_show
-};
-
-static int exp_open(struct inode *inode, struct file *file)
-{
-       return seq_open_net(inode, file, &exp_seq_ops,
-                           sizeof(struct ct_expect_iter_state));
-}
-
-static const struct file_operations ip_exp_file_ops = {
-       .owner   = THIS_MODULE,
-       .open    = exp_open,
-       .read    = seq_read,
-       .llseek  = seq_lseek,
-       .release = seq_release_net,
-};
-
-static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
-{
-       struct net *net = seq_file_net(seq);
-       int cpu;
-
-       if (*pos == 0)
-               return SEQ_START_TOKEN;
-
-       for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
-               if (!cpu_possible(cpu))
-                       continue;
-               *pos = cpu+1;
-               return per_cpu_ptr(net->ct.stat, cpu);
-       }
-
-       return NULL;
-}
-
-static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-       struct net *net = seq_file_net(seq);
-       int cpu;
-
-       for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
-               if (!cpu_possible(cpu))
-                       continue;
-               *pos = cpu+1;
-               return per_cpu_ptr(net->ct.stat, cpu);
-       }
-
-       return NULL;
-}
-
-static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
-{
-}
-
-static int ct_cpu_seq_show(struct seq_file *seq, void *v)
-{
-       struct net *net = seq_file_net(seq);
-       unsigned int nr_conntracks = atomic_read(&net->ct.count);
-       const struct ip_conntrack_stat *st = v;
-
-       if (v == SEQ_START_TOKEN) {
-               seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete search_restart\n");
-               return 0;
-       }
-
-       seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
-                       "%08x %08x %08x %08x %08x  %08x %08x %08x %08x\n",
-                  nr_conntracks,
-                  st->searched,
-                  st->found,
-                  st->new,
-                  st->invalid,
-                  st->ignore,
-                  st->delete,
-                  st->delete_list,
-                  st->insert,
-                  st->insert_failed,
-                  st->drop,
-                  st->early_drop,
-                  st->error,
-
-                  st->expect_new,
-                  st->expect_create,
-                  st->expect_delete,
-                  st->search_restart
-               );
-       return 0;
-}
-
-static const struct seq_operations ct_cpu_seq_ops = {
-       .start  = ct_cpu_seq_start,
-       .next   = ct_cpu_seq_next,
-       .stop   = ct_cpu_seq_stop,
-       .show   = ct_cpu_seq_show,
-};
-
-static int ct_cpu_seq_open(struct inode *inode, struct file *file)
-{
-       return seq_open_net(inode, file, &ct_cpu_seq_ops,
-                           sizeof(struct seq_net_private));
-}
-
-static const struct file_operations ct_cpu_seq_fops = {
-       .owner   = THIS_MODULE,
-       .open    = ct_cpu_seq_open,
-       .read    = seq_read,
-       .llseek  = seq_lseek,
-       .release = seq_release_net,
-};
-
-static int __net_init ip_conntrack_net_init(struct net *net)
-{
-       struct proc_dir_entry *proc, *proc_exp, *proc_stat;
-
-       proc = proc_create("ip_conntrack", 0440, net->proc_net, &ct_file_ops);
-       if (!proc)
-               goto err1;
-
-       proc_exp = proc_create("ip_conntrack_expect", 0440, net->proc_net,
-                              &ip_exp_file_ops);
-       if (!proc_exp)
-               goto err2;
-
-       proc_stat = proc_create("ip_conntrack", S_IRUGO,
-                               net->proc_net_stat, &ct_cpu_seq_fops);
-       if (!proc_stat)
-               goto err3;
-       return 0;
-
-err3:
-       remove_proc_entry("ip_conntrack_expect", net->proc_net);
-err2:
-       remove_proc_entry("ip_conntrack", net->proc_net);
-err1:
-       return -ENOMEM;
-}
-
-static void __net_exit ip_conntrack_net_exit(struct net *net)
-{
-       remove_proc_entry("ip_conntrack", net->proc_net_stat);
-       remove_proc_entry("ip_conntrack_expect", net->proc_net);
-       remove_proc_entry("ip_conntrack", net->proc_net);
-}
-
-static struct pernet_operations ip_conntrack_net_ops = {
-       .init = ip_conntrack_net_init,
-       .exit = ip_conntrack_net_exit,
-};
-
-int __init nf_conntrack_ipv4_compat_init(void)
-{
-       return register_pernet_subsys(&ip_conntrack_net_ops);
-}
-
-void __exit nf_conntrack_ipv4_compat_fini(void)
-{
-       unregister_pernet_subsys(&ip_conntrack_net_ops);
-}
index c567e1b..4b5904b 100644 (file)
@@ -327,17 +327,6 @@ static struct ctl_table icmp_sysctl_table[] = {
        },
        { }
 };
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table icmp_compat_sysctl_table[] = {
-       {
-               .procname       = "ip_conntrack_icmp_timeout",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif /* CONFIG_SYSCTL */
 
 static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -355,40 +344,14 @@ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
        return 0;
 }
 
-static int icmp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
-                                           struct nf_icmp_net *in)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-       pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table,
-                                      sizeof(icmp_compat_sysctl_table),
-                                      GFP_KERNEL);
-       if (!pn->ctl_compat_table)
-               return -ENOMEM;
-
-       pn->ctl_compat_table[0].data = &in->timeout;
-#endif
-#endif
-       return 0;
-}
-
 static int icmp_init_net(struct net *net, u_int16_t proto)
 {
-       int ret;
        struct nf_icmp_net *in = icmp_pernet(net);
        struct nf_proto_net *pn = &in->pn;
 
        in->timeout = nf_ct_icmp_timeout;
 
-       ret = icmp_kmemdup_compat_sysctl_table(pn, in);
-       if (ret < 0)
-               return ret;
-
-       ret = icmp_kmemdup_sysctl_table(pn, in);
-       if (ret < 0)
-               nf_ct_kfree_compat_sysctl_table(pn);
-
-       return ret;
+       return icmp_kmemdup_sysctl_table(pn, in);
 }
 
 static struct nf_proto_net *icmp_get_net_proto(struct net *net)
index ceb1873..cf986e1 100644 (file)
@@ -74,21 +74,19 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
        nf_conntrack_get(skb->nfct);
 #endif
        /*
-        * If we are in PREROUTING/INPUT, the checksum must be recalculated
-        * since the length could have changed as a result of defragmentation.
-        *
-        * We also decrease the TTL to mitigate potential loops between two
-        * hosts.
+        * If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential
+        * loops between two hosts.
         *
         * Set %IP_DF so that the original source is notified of a potentially
         * decreased MTU on the clone route. IPv6 does this too.
+        *
+        * IP header checksum will be recalculated at ip_local_out.
         */
        iph = ip_hdr(skb);
        iph->frag_off |= htons(IP_DF);
        if (hooknum == NF_INET_PRE_ROUTING ||
            hooknum == NF_INET_LOCAL_IN)
                --iph->ttl;
-       ip_send_check(iph);
 
        if (nf_dup_ipv4_route(net, skb, gw, oif)) {
                __this_cpu_write(nf_skb_duplicated, true);
index e7ad950..8945c26 100644 (file)
@@ -62,7 +62,7 @@ static void dump_arp_packet(struct nf_log_buf *m,
        /* If it's for Ethernet and the lengths are OK, then log the ARP
         * payload.
         */
-       if (ah->ar_hrd != htons(1) ||
+       if (ah->ar_hrd != htons(ARPHRD_ETHER) ||
            ah->ar_hln != ETH_ALEN ||
            ah->ar_pln != sizeof(__be32))
                return;
@@ -111,8 +111,7 @@ static struct nf_logger nf_arp_logger __read_mostly = {
 
 static int __net_init nf_log_arp_net_init(struct net *net)
 {
-       nf_log_set(net, NFPROTO_ARP, &nf_arp_logger);
-       return 0;
+       return nf_log_set(net, NFPROTO_ARP, &nf_arp_logger);
 }
 
 static void __net_exit nf_log_arp_net_exit(struct net *net)
index 076aadd..20f2255 100644 (file)
@@ -347,8 +347,7 @@ static struct nf_logger nf_ip_logger __read_mostly = {
 
 static int __net_init nf_log_ipv4_net_init(struct net *net)
 {
-       nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger);
-       return 0;
+       return nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger);
 }
 
 static void __net_exit nf_log_ipv4_net_exit(struct net *net)
index c24f41c..2c2553b 100644 (file)
@@ -46,6 +46,7 @@ static const struct nft_expr_ops nft_reject_ipv4_ops = {
        .eval           = nft_reject_ipv4_eval,
        .init           = nft_reject_init,
        .dump           = nft_reject_dump,
+       .validate       = nft_reject_validate,
 };
 
 static struct nft_expr_type nft_reject_ipv4_type __read_mostly = {
index 9f665b6..1ed015e 100644 (file)
@@ -257,6 +257,7 @@ static const struct snmp_mib snmp4_net_list[] = {
        SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS),
        SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND),
        SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED),
+       SNMP_MIB_ITEM("TCPMD5Failure", LINUX_MIB_TCPMD5FAILURE),
        SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED),
        SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED),
        SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
index 438f50c..90a85c9 100644 (file)
@@ -606,12 +606,6 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                            (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
                           daddr, saddr, 0, 0);
 
-       if (!saddr && ipc.oif) {
-               err = l3mdev_get_saddr(net, ipc.oif, &fl4);
-               if (err < 0)
-                       goto done;
-       }
-
        if (!inet->hdrincl) {
                rfv.msg = msg;
                rfv.hlen = 0;
index a1f2830..b52496f 100644 (file)
@@ -1246,7 +1246,9 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
                        mtu = 576;
        }
 
-       return min_t(unsigned int, mtu, IP_MAX_MTU);
+       mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
+
+       return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
 }
 
 static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
@@ -1829,7 +1831,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
         *      Now we are ready to route packet.
         */
        fl4.flowi4_oif = 0;
-       fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev);
+       fl4.flowi4_iif = dev->ifindex;
        fl4.flowi4_mark = skb->mark;
        fl4.flowi4_tos = tos;
        fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
@@ -2016,7 +2018,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
                return ERR_PTR(-EINVAL);
 
        if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
-               if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
+               if (ipv4_is_loopback(fl4->saddr) &&
+                   !(dev_out->flags & IFF_LOOPBACK) &&
+                   !netif_is_l3_master(dev_out))
                        return ERR_PTR(-EINVAL);
 
        if (ipv4_is_lbcast(fl4->daddr))
@@ -2146,7 +2150,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
        unsigned int flags = 0;
        struct fib_result res;
        struct rtable *rth;
-       int master_idx;
        int orig_oif;
        int err = -ENETUNREACH;
 
@@ -2156,9 +2159,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
 
        orig_oif = fl4->flowi4_oif;
 
-       master_idx = l3mdev_master_ifindex_by_index(net, fl4->flowi4_oif);
-       if (master_idx)
-               fl4->flowi4_oif = master_idx;
        fl4->flowi4_iif = LOOPBACK_IFINDEX;
        fl4->flowi4_tos = tos & IPTOS_RT_MASK;
        fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
@@ -2242,10 +2242,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
                                fl4->saddr = inet_select_addr(dev_out, 0,
                                                              RT_SCOPE_HOST);
                }
-
-               rth = l3mdev_get_rtable(dev_out, fl4);
-               if (rth)
-                       goto out;
        }
 
        if (!fl4->daddr) {
@@ -2263,8 +2259,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
        if (err) {
                res.fi = NULL;
                res.table = NULL;
-               if (fl4->flowi4_oif &&
-                   !netif_index_is_l3_master(net, fl4->flowi4_oif)) {
+               if (fl4->flowi4_oif) {
                        /* Apparently, routing tables are wrong. Assume,
                           that the destination is on link.
 
@@ -2300,7 +2295,9 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
                        else
                                fl4->saddr = fl4->daddr;
                }
-               dev_out = net->loopback_dev;
+
+               /* L3 master device is the loopback for that domain */
+               dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev;
                fl4->flowi4_oif = dev_out->ifindex;
                flags |= RTCF_LOCAL;
                goto make_route;
@@ -2575,9 +2572,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
        fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
        fl4.flowi4_mark = mark;
 
-       if (netif_index_is_l3_master(net, fl4.flowi4_oif))
-               fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF;
-
        if (iif) {
                struct net_device *dev;
 
index 032a96d..a13fcb3 100644 (file)
@@ -380,7 +380,7 @@ void tcp_init_sock(struct sock *sk)
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
 
-       __skb_queue_head_init(&tp->out_of_order_queue);
+       tp->out_of_order_queue = RB_ROOT;
        tcp_init_xmit_timers(sk);
        tcp_prequeue_init(tp);
        INIT_LIST_HEAD(&tp->tsq_node);
@@ -1570,6 +1570,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 }
 EXPORT_SYMBOL(tcp_read_sock);
 
+int tcp_peek_len(struct socket *sock)
+{
+       return tcp_inq(sock->sk);
+}
+EXPORT_SYMBOL(tcp_peek_len);
+
 /*
  *     This routine copies from a sock struct into the user buffer.
  *
@@ -2237,7 +2243,7 @@ int tcp_disconnect(struct sock *sk, int flags)
        tcp_clear_xmit_timers(sk);
        __skb_queue_purge(&sk->sk_receive_queue);
        tcp_write_queue_purge(sk);
-       __skb_queue_purge(&tp->out_of_order_queue);
+       skb_rbtree_purge(&tp->out_of_order_queue);
 
        inet->inet_dport = 0;
 
@@ -3092,23 +3098,6 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
 }
 EXPORT_SYMBOL(tcp_get_md5sig_pool);
 
-int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
-                       const struct tcphdr *th)
-{
-       struct scatterlist sg;
-       struct tcphdr hdr;
-
-       /* We are not allowed to change tcphdr, make a local copy */
-       memcpy(&hdr, th, sizeof(hdr));
-       hdr.check = 0;
-
-       /* options aren't included in the hash */
-       sg_init_one(&sg, &hdr, sizeof(hdr));
-       ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(hdr));
-       return crypto_ahash_update(hp->md5_req);
-}
-EXPORT_SYMBOL(tcp_md5_hash_header);
-
 int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
                          const struct sk_buff *skb, unsigned int header_len)
 {
@@ -3193,7 +3182,6 @@ int tcp_abort(struct sock *sk, int err)
                        local_bh_enable();
                        return 0;
                }
-               sock_gen_put(sk);
                return -EOPNOTSUPP;
        }
 
@@ -3222,7 +3210,6 @@ int tcp_abort(struct sock *sk, int err)
        bh_unlock_sock(sk);
        local_bh_enable();
        release_sock(sk);
-       sock_put(sk);
        return 0;
 }
 EXPORT_SYMBOL_GPL(tcp_abort);
index 4d61093..a748c74 100644 (file)
@@ -54,11 +54,16 @@ static int tcp_diag_destroy(struct sk_buff *in_skb,
 {
        struct net *net = sock_net(in_skb->sk);
        struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req);
+       int err;
 
        if (IS_ERR(sk))
                return PTR_ERR(sk);
 
-       return sock_diag_destroy(sk, ECONNABORTED);
+       err = sock_diag_destroy(sk, ECONNABORTED);
+
+       sock_gen_put(sk);
+
+       return err;
 }
 #endif
 
index 54d9f9b..4e777a3 100644 (file)
@@ -150,6 +150,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
        tp->segs_in = 0;
        tcp_segs_in(tp, skb);
        __skb_pull(skb, tcp_hdrlen(skb));
+       sk_forced_mem_schedule(sk, skb->truesize);
        skb_set_owner_r(skb, sk);
 
        TCP_SKB_CB(skb)->seq++;
@@ -226,6 +227,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
        tcp_fastopen_add_skb(child, skb);
 
        tcp_rsk(req)->rcv_nxt = tp->rcv_nxt;
+       tp->rcv_wup = tp->rcv_nxt;
        /* tcp_conn_request() is sending the SYNACK,
         * and queues the child into listener accept queue.
         */
index 3ebf45b..dad3e7e 100644 (file)
@@ -4108,7 +4108,7 @@ void tcp_fin(struct sock *sk)
        /* It _is_ possible, that we have something out-of-order _after_ FIN.
         * Probably, we should reset in this case. For now drop them.
         */
-       __skb_queue_purge(&tp->out_of_order_queue);
+       skb_rbtree_purge(&tp->out_of_order_queue);
        if (tcp_is_sack(tp))
                tcp_sack_reset(&tp->rx_opt);
        sk_mem_reclaim(sk);
@@ -4268,7 +4268,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
        int this_sack;
 
        /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
-       if (skb_queue_empty(&tp->out_of_order_queue)) {
+       if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
                tp->rx_opt.num_sacks = 0;
                return;
        }
@@ -4344,10 +4344,13 @@ static void tcp_ofo_queue(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        __u32 dsack_high = tp->rcv_nxt;
+       bool fin, fragstolen, eaten;
        struct sk_buff *skb, *tail;
-       bool fragstolen, eaten;
+       struct rb_node *p;
 
-       while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
+       p = rb_first(&tp->out_of_order_queue);
+       while (p) {
+               skb = rb_entry(p, struct sk_buff, rbnode);
                if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
                        break;
 
@@ -4357,9 +4360,10 @@ static void tcp_ofo_queue(struct sock *sk)
                                dsack_high = TCP_SKB_CB(skb)->end_seq;
                        tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
                }
+               p = rb_next(p);
+               rb_erase(&skb->rbnode, &tp->out_of_order_queue);
 
-               __skb_unlink(skb, &tp->out_of_order_queue);
-               if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
+               if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
                        SOCK_DEBUG(sk, "ofo packet was already received\n");
                        tcp_drop(sk, skb);
                        continue;
@@ -4371,12 +4375,19 @@ static void tcp_ofo_queue(struct sock *sk)
                tail = skb_peek_tail(&sk->sk_receive_queue);
                eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
                tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
+               fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
                if (!eaten)
                        __skb_queue_tail(&sk->sk_receive_queue, skb);
-               if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
-                       tcp_fin(sk);
-               if (eaten)
+               else
                        kfree_skb_partial(skb, fragstolen);
+
+               if (unlikely(fin)) {
+                       tcp_fin(sk);
+                       /* tcp_fin() purges tp->out_of_order_queue,
+                        * so we must end this loop right now.
+                        */
+                       break;
+               }
        }
 }
 
@@ -4392,12 +4403,9 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
                if (tcp_prune_queue(sk) < 0)
                        return -1;
 
-               if (!sk_rmem_schedule(sk, skb, size)) {
+               while (!sk_rmem_schedule(sk, skb, size)) {
                        if (!tcp_prune_ofo_queue(sk))
                                return -1;
-
-                       if (!sk_rmem_schedule(sk, skb, size))
-                               return -1;
                }
        }
        return 0;
@@ -4406,8 +4414,10 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
 static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 {
        struct tcp_sock *tp = tcp_sk(sk);
+       struct rb_node **p, *q, *parent;
        struct sk_buff *skb1;
        u32 seq, end_seq;
+       bool fragstolen;
 
        tcp_ecn_check_ce(tp, skb);
 
@@ -4422,88 +4432,92 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
        inet_csk_schedule_ack(sk);
 
        NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
+       seq = TCP_SKB_CB(skb)->seq;
+       end_seq = TCP_SKB_CB(skb)->end_seq;
        SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
-                  tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+                  tp->rcv_nxt, seq, end_seq);
 
-       skb1 = skb_peek_tail(&tp->out_of_order_queue);
-       if (!skb1) {
+       p = &tp->out_of_order_queue.rb_node;
+       if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
                /* Initial out of order segment, build 1 SACK. */
                if (tcp_is_sack(tp)) {
                        tp->rx_opt.num_sacks = 1;
-                       tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
-                       tp->selective_acks[0].end_seq =
-                                               TCP_SKB_CB(skb)->end_seq;
+                       tp->selective_acks[0].start_seq = seq;
+                       tp->selective_acks[0].end_seq = end_seq;
                }
-               __skb_queue_head(&tp->out_of_order_queue, skb);
+               rb_link_node(&skb->rbnode, NULL, p);
+               rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
+               tp->ooo_last_skb = skb;
                goto end;
        }
 
-       seq = TCP_SKB_CB(skb)->seq;
-       end_seq = TCP_SKB_CB(skb)->end_seq;
-
-       if (seq == TCP_SKB_CB(skb1)->end_seq) {
-               bool fragstolen;
-
-               if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
-                       __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
-               } else {
-                       tcp_grow_window(sk, skb);
-                       kfree_skb_partial(skb, fragstolen);
-                       skb = NULL;
-               }
-
-               if (!tp->rx_opt.num_sacks ||
-                   tp->selective_acks[0].end_seq != seq)
-                       goto add_sack;
-
-               /* Common case: data arrive in order after hole. */
-               tp->selective_acks[0].end_seq = end_seq;
-               goto end;
-       }
-
-       /* Find place to insert this segment. */
-       while (1) {
-               if (!after(TCP_SKB_CB(skb1)->seq, seq))
-                       break;
-               if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
-                       skb1 = NULL;
-                       break;
-               }
-               skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
-       }
-
-       /* Do skb overlap to previous one? */
-       if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
-               if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
-                       /* All the bits are present. Drop. */
-                       NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
-                       tcp_drop(sk, skb);
-                       skb = NULL;
-                       tcp_dsack_set(sk, seq, end_seq);
-                       goto add_sack;
+       /* In the typical case, we are adding an skb to the end of the list.
+        * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
+        */
+       if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) {
+coalesce_done:
+               tcp_grow_window(sk, skb);
+               kfree_skb_partial(skb, fragstolen);
+               skb = NULL;
+               goto add_sack;
+       }
+       /* Can avoid an rbtree lookup if we are adding skb after ooo_last_skb */
+       if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) {
+               parent = &tp->ooo_last_skb->rbnode;
+               p = &parent->rb_right;
+               goto insert;
+       }
+
+       /* Find place to insert this segment. Handle overlaps on the way. */
+       parent = NULL;
+       while (*p) {
+               parent = *p;
+               skb1 = rb_entry(parent, struct sk_buff, rbnode);
+               if (before(seq, TCP_SKB_CB(skb1)->seq)) {
+                       p = &parent->rb_left;
+                       continue;
                }
-               if (after(seq, TCP_SKB_CB(skb1)->seq)) {
-                       /* Partial overlap. */
-                       tcp_dsack_set(sk, seq,
-                                     TCP_SKB_CB(skb1)->end_seq);
-               } else {
-                       if (skb_queue_is_first(&tp->out_of_order_queue,
-                                              skb1))
-                               skb1 = NULL;
-                       else
-                               skb1 = skb_queue_prev(
-                                       &tp->out_of_order_queue,
-                                       skb1);
+               if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
+                       if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+                               /* All the bits are present. Drop. */
+                               NET_INC_STATS(sock_net(sk),
+                                             LINUX_MIB_TCPOFOMERGE);
+                               __kfree_skb(skb);
+                               skb = NULL;
+                               tcp_dsack_set(sk, seq, end_seq);
+                               goto add_sack;
+                       }
+                       if (after(seq, TCP_SKB_CB(skb1)->seq)) {
+                               /* Partial overlap. */
+                               tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq);
+                       } else {
+                               /* skb's seq == skb1's seq and skb covers skb1.
+                                * Replace skb1 with skb.
+                                */
+                               rb_replace_node(&skb1->rbnode, &skb->rbnode,
+                                               &tp->out_of_order_queue);
+                               tcp_dsack_extend(sk,
+                                                TCP_SKB_CB(skb1)->seq,
+                                                TCP_SKB_CB(skb1)->end_seq);
+                               NET_INC_STATS(sock_net(sk),
+                                             LINUX_MIB_TCPOFOMERGE);
+                               __kfree_skb(skb1);
+                               goto merge_right;
+                       }
+               } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
+                       goto coalesce_done;
                }
+               p = &parent->rb_right;
        }
-       if (!skb1)
-               __skb_queue_head(&tp->out_of_order_queue, skb);
-       else
-               __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+insert:
+       /* Insert segment into RB tree. */
+       rb_link_node(&skb->rbnode, parent, p);
+       rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
 
-       /* And clean segments covered by new one as whole. */
-       while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
-               skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
+merge_right:
+       /* Remove other segments covered by skb. */
+       while ((q = rb_next(&skb->rbnode)) != NULL) {
+               skb1 = rb_entry(q, struct sk_buff, rbnode);
 
                if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
                        break;
@@ -4512,12 +4526,15 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
                                         end_seq);
                        break;
                }
-               __skb_unlink(skb1, &tp->out_of_order_queue);
+               rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
                tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
                                 TCP_SKB_CB(skb1)->end_seq);
                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
                tcp_drop(sk, skb1);
        }
+       /* If there is no skb after us, we are the last_skb ! */
+       if (!q)
+               tp->ooo_last_skb = skb;
 
 add_sack:
        if (tcp_is_sack(tp))
@@ -4654,13 +4671,13 @@ queue_and_out:
                if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
                        tcp_fin(sk);
 
-               if (!skb_queue_empty(&tp->out_of_order_queue)) {
+               if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
                        tcp_ofo_queue(sk);
 
                        /* RFC2581. 4.2. SHOULD send immediate ACK, when
                         * gap in queue is filled.
                         */
-                       if (skb_queue_empty(&tp->out_of_order_queue))
+                       if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
                                inet_csk(sk)->icsk_ack.pingpong = 0;
                }
 
@@ -4714,48 +4731,76 @@ drop:
        tcp_data_queue_ofo(sk, skb);
 }
 
+static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list)
+{
+       if (list)
+               return !skb_queue_is_last(list, skb) ? skb->next : NULL;
+
+       return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
+}
+
 static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
-                                       struct sk_buff_head *list)
+                                       struct sk_buff_head *list,
+                                       struct rb_root *root)
 {
-       struct sk_buff *next = NULL;
+       struct sk_buff *next = tcp_skb_next(skb, list);
 
-       if (!skb_queue_is_last(list, skb))
-               next = skb_queue_next(list, skb);
+       if (list)
+               __skb_unlink(skb, list);
+       else
+               rb_erase(&skb->rbnode, root);
 
-       __skb_unlink(skb, list);
        __kfree_skb(skb);
        NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
 
        return next;
 }
 
+/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
+static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
+{
+       struct rb_node **p = &root->rb_node;
+       struct rb_node *parent = NULL;
+       struct sk_buff *skb1;
+
+       while (*p) {
+               parent = *p;
+               skb1 = rb_entry(parent, struct sk_buff, rbnode);
+               if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
+                       p = &parent->rb_left;
+               else
+                       p = &parent->rb_right;
+       }
+       rb_link_node(&skb->rbnode, parent, p);
+       rb_insert_color(&skb->rbnode, root);
+}
+
 /* Collapse contiguous sequence of skbs head..tail with
  * sequence numbers start..end.
  *
- * If tail is NULL, this means until the end of the list.
+ * If tail is NULL, this means until the end of the queue.
  *
  * Segments with FIN/SYN are not collapsed (only because this
  * simplifies code)
  */
 static void
-tcp_collapse(struct sock *sk, struct sk_buff_head *list,
-            struct sk_buff *head, struct sk_buff *tail,
-            u32 start, u32 end)
+tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
+            struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end)
 {
-       struct sk_buff *skb, *n;
+       struct sk_buff *skb = head, *n;
+       struct sk_buff_head tmp;
        bool end_of_skbs;
 
        /* First, check that queue is collapsible and find
-        * the point where collapsing can be useful. */
-       skb = head;
+        * the point where collapsing can be useful.
+        */
 restart:
-       end_of_skbs = true;
-       skb_queue_walk_from_safe(list, skb, n) {
-               if (skb == tail)
-                       break;
+       for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
+               n = tcp_skb_next(skb, list);
+
                /* No new bits? It is possible on ofo queue. */
                if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
-                       skb = tcp_collapse_one(sk, skb, list);
+                       skb = tcp_collapse_one(sk, skb, list, root);
                        if (!skb)
                                break;
                        goto restart;
@@ -4773,13 +4818,10 @@ restart:
                        break;
                }
 
-               if (!skb_queue_is_last(list, skb)) {
-                       struct sk_buff *next = skb_queue_next(list, skb);
-                       if (next != tail &&
-                           TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
-                               end_of_skbs = false;
-                               break;
-                       }
+               if (n && n != tail &&
+                   TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
+                       end_of_skbs = false;
+                       break;
                }
 
                /* Decided to skip this, advance start seq. */
@@ -4789,17 +4831,22 @@ restart:
            (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
                return;
 
+       __skb_queue_head_init(&tmp);
+
        while (before(start, end)) {
                int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
                struct sk_buff *nskb;
 
                nskb = alloc_skb(copy, GFP_ATOMIC);
                if (!nskb)
-                       return;
+                       break;
 
                memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
                TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
-               __skb_queue_before(list, skb, nskb);
+               if (list)
+                       __skb_queue_before(list, skb, nskb);
+               else
+                       __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */
                skb_set_owner_r(nskb, sk);
 
                /* Copy data, releasing collapsed skbs. */
@@ -4817,14 +4864,17 @@ restart:
                                start += size;
                        }
                        if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
-                               skb = tcp_collapse_one(sk, skb, list);
+                               skb = tcp_collapse_one(sk, skb, list, root);
                                if (!skb ||
                                    skb == tail ||
                                    (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
-                                       return;
+                                       goto end;
                        }
                }
        }
+end:
+       skb_queue_walk_safe(&tmp, skb, n)
+               tcp_rbtree_insert(root, skb);
 }
 
 /* Collapse ofo queue. Algorithm: select contiguous sequence of skbs
@@ -4833,70 +4883,86 @@ restart:
 static void tcp_collapse_ofo_queue(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-       struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
-       struct sk_buff *head;
+       struct sk_buff *skb, *head;
+       struct rb_node *p;
        u32 start, end;
 
-       if (!skb)
+       p = rb_first(&tp->out_of_order_queue);
+       skb = rb_entry_safe(p, struct sk_buff, rbnode);
+new_range:
+       if (!skb) {
+               p = rb_last(&tp->out_of_order_queue);
+               /* Note: This is possible p is NULL here. We do not
+                * use rb_entry_safe(), as ooo_last_skb is valid only
+                * if rbtree is not empty.
+                */
+               tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
                return;
-
+       }
        start = TCP_SKB_CB(skb)->seq;
        end = TCP_SKB_CB(skb)->end_seq;
-       head = skb;
-
-       for (;;) {
-               struct sk_buff *next = NULL;
 
-               if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
-                       next = skb_queue_next(&tp->out_of_order_queue, skb);
-               skb = next;
+       for (head = skb;;) {
+               skb = tcp_skb_next(skb, NULL);
 
-               /* Segment is terminated when we see gap or when
-                * we are at the end of all the queue. */
+               /* Range is terminated when we see a gap or when
+                * we are at the queue end.
+                */
                if (!skb ||
                    after(TCP_SKB_CB(skb)->seq, end) ||
                    before(TCP_SKB_CB(skb)->end_seq, start)) {
-                       tcp_collapse(sk, &tp->out_of_order_queue,
+                       tcp_collapse(sk, NULL, &tp->out_of_order_queue,
                                     head, skb, start, end);
-                       head = skb;
-                       if (!skb)
-                               break;
-                       /* Start new segment */
+                       goto new_range;
+               }
+
+               if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
                        start = TCP_SKB_CB(skb)->seq;
+               if (after(TCP_SKB_CB(skb)->end_seq, end))
                        end = TCP_SKB_CB(skb)->end_seq;
-               } else {
-                       if (before(TCP_SKB_CB(skb)->seq, start))
-                               start = TCP_SKB_CB(skb)->seq;
-                       if (after(TCP_SKB_CB(skb)->end_seq, end))
-                               end = TCP_SKB_CB(skb)->end_seq;
-               }
        }
 }
 
 /*
- * Purge the out-of-order queue.
- * Return true if queue was pruned.
+ * Clean the out-of-order queue to make room.
+ * We drop high sequences packets to :
+ * 1) Let a chance for holes to be filled.
+ * 2) not add too big latencies if thousands of packets sit there.
+ *    (But if application shrinks SO_RCVBUF, we could still end up
+ *     freeing whole queue here)
+ *
+ * Return true if queue has shrunk.
  */
 static bool tcp_prune_ofo_queue(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-       bool res = false;
+       struct rb_node *node, *prev;
 
-       if (!skb_queue_empty(&tp->out_of_order_queue)) {
-               NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
-               __skb_queue_purge(&tp->out_of_order_queue);
+       if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
+               return false;
 
-               /* Reset SACK state.  A conforming SACK implementation will
-                * do the same at a timeout based retransmit.  When a connection
-                * is in a sad state like this, we care only about integrity
-                * of the connection not performance.
-                */
-               if (tp->rx_opt.sack_ok)
-                       tcp_sack_reset(&tp->rx_opt);
+       NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
+       node = &tp->ooo_last_skb->rbnode;
+       do {
+               prev = rb_prev(node);
+               rb_erase(node, &tp->out_of_order_queue);
+               tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
                sk_mem_reclaim(sk);
-               res = true;
-       }
-       return res;
+               if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+                   !tcp_under_memory_pressure(sk))
+                       break;
+               node = prev;
+       } while (node);
+       tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
+
+       /* Reset SACK state.  A conforming SACK implementation will
+        * do the same at a timeout based retransmit.  When a connection
+        * is in a sad state like this, we care only about integrity
+        * of the connection not performance.
+        */
+       if (tp->rx_opt.sack_ok)
+               tcp_sack_reset(&tp->rx_opt);
+       return true;
 }
 
 /* Reduce allocated memory if we can, trying to get
@@ -4921,7 +4987,7 @@ static int tcp_prune_queue(struct sock *sk)
 
        tcp_collapse_ofo_queue(sk);
        if (!skb_queue_empty(&sk->sk_receive_queue))
-               tcp_collapse(sk, &sk->sk_receive_queue,
+               tcp_collapse(sk, &sk->sk_receive_queue, NULL,
                             skb_peek(&sk->sk_receive_queue),
                             NULL,
                             tp->copied_seq, tp->rcv_nxt);
@@ -5026,7 +5092,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
            /* We ACK each frame or... */
            tcp_in_quickack_mode(sk) ||
            /* We have out of order data. */
-           (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
+           (ofo_possible && !RB_EMPTY_ROOT(&tp->out_of_order_queue))) {
                /* Then ack it now */
                tcp_send_ack(sk);
        } else {
index 32b048e..04b9893 100644 (file)
@@ -814,8 +814,14 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
        u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
                                             tcp_sk(sk)->snd_nxt;
 
+       /* RFC 7323 2.3
+        * The window field (SEG.WND) of every outgoing segment, with the
+        * exception of <SYN> segments, MUST be right-shifted by
+        * Rcv.Wind.Shift bits:
+        */
        tcp_v4_send_ack(sock_net(sk), skb, seq,
-                       tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
+                       tcp_rsk(req)->rcv_nxt,
+                       req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
                        tcp_time_stamp,
                        req->ts_recent,
                        0,
@@ -1169,6 +1175,7 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
                                      NULL, skb);
 
        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+               NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
                net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
                                     &iph->saddr, ntohs(th->source),
                                     &iph->daddr, ntohs(th->dest),
@@ -1531,6 +1538,34 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(tcp_prequeue);
 
+bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
+{
+       u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
+
+       /* Only socket owner can try to collapse/prune rx queues
+        * to reduce memory overhead, so add a little headroom here.
+        * Few sockets backlog are possibly concurrently non empty.
+        */
+       limit += 64*1024;
+
+       /* In case all data was pulled from skb frags (in __pskb_pull_tail()),
+        * we can fix skb->truesize to its real value to avoid future drops.
+        * This is valid because skb is not yet charged to the socket.
+        * It has been noticed pure SACK packets were sometimes dropped
+        * (if cooked by drivers without copybreak feature).
+        */
+       if (!skb->data_len)
+               skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
+
+       if (unlikely(sk_add_backlog(sk, skb, limit))) {
+               bh_unlock_sock(sk);
+               __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
+               return true;
+       }
+       return false;
+}
+EXPORT_SYMBOL(tcp_add_backlog);
+
 /*
  *     From tcp_input.c
  */
@@ -1602,6 +1637,7 @@ process:
 
                sk = req->rsk_listener;
                if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
+                       sk_drops_add(sk, skb);
                        reqsk_put(req);
                        goto discard_it;
                }
@@ -1660,10 +1696,7 @@ process:
        if (!sock_owned_by_user(sk)) {
                if (!tcp_prequeue(sk, skb))
                        ret = tcp_v4_do_rcv(sk, skb);
-       } else if (unlikely(sk_add_backlog(sk, skb,
-                                          sk->sk_rcvbuf + sk->sk_sndbuf))) {
-               bh_unlock_sock(sk);
-               __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
+       } else if (tcp_add_backlog(sk, skb)) {
                goto discard_and_relse;
        }
        bh_unlock_sock(sk);
@@ -1812,7 +1845,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
        tcp_write_queue_purge(sk);
 
        /* Cleans up our, hopefully empty, out_of_order_queue. */
-       __skb_queue_purge(&tp->out_of_order_queue);
+       skb_rbtree_purge(&tp->out_of_order_queue);
 
 #ifdef CONFIG_TCP_MD5SIG
        /* Clean up the MD5 key list, if any */
index b617826..bf1f3b2 100644 (file)
@@ -751,7 +751,7 @@ static struct genl_family tcp_metrics_nl_family = {
        .netnsok        = true,
 };
 
-static struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = {
+static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = {
        [TCP_METRICS_ATTR_ADDR_IPV4]    = { .type = NLA_U32, },
        [TCP_METRICS_ATTR_ADDR_IPV6]    = { .type = NLA_BINARY,
                                            .len = sizeof(struct in6_addr), },
index 4b95ec4..f63c73d 100644 (file)
@@ -488,7 +488,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
                newtp->snd_cwnd_cnt = 0;
 
                tcp_init_xmit_timers(newsk);
-               __skb_queue_head_init(&newtp->out_of_order_queue);
                newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
 
                newtp->rx_opt.saw_tstamp = 0;
index bdaef7f..8b45794 100644 (file)
@@ -2776,7 +2776,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 
        max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk));
        tcp_for_write_queue_from(skb, sk) {
-               __u8 sacked = TCP_SKB_CB(skb)->sacked;
+               __u8 sacked;
                int segs;
 
                if (skb == tcp_send_head(sk))
@@ -2788,6 +2788,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
                segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
                if (segs <= 0)
                        return;
+               sacked = TCP_SKB_CB(skb)->sacked;
                /* In case tcp_shift_skb_data() have aggregated large skbs,
                 * we need to make sure not sending too bigs TSO packets
                 */
index 028eb04..9c5fc97 100644 (file)
@@ -76,7 +76,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
        if (!tcp_is_cwnd_limited(sk))
                return;
 
-       if (tp->snd_cwnd <= tp->snd_ssthresh)
+       if (tcp_in_slow_start(tp))
                tcp_slow_start(tp, acked);
 
        else if (!yeah->doing_reno_now) {
index e61f7cd..7d96dc2 100644 (file)
 #include <net/busy_poll.h>
 #include "udp_impl.h"
 #include <net/sock_reuseport.h>
+#include <net/addrconf.h>
 
 struct udp_table udp_table __read_mostly;
 EXPORT_SYMBOL(udp_table);
@@ -1020,12 +1021,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                                   flow_flags,
                                   faddr, saddr, dport, inet->inet_sport);
 
-               if (!saddr && ipc.oif) {
-                       err = l3mdev_get_saddr(net, ipc.oif, fl4);
-                       if (err < 0)
-                               goto out;
-               }
-
                security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
                rt = ip_route_output_flow(net, fl4, sk);
                if (IS_ERR(rt)) {
@@ -1182,13 +1177,13 @@ out:
  *     @sk: socket
  *
  *     Drops all bad checksum frames, until a valid one is found.
- *     Returns the length of found skb, or 0 if none is found.
+ *     Returns the length of found skb, or -1 if none is found.
  */
-static unsigned int first_packet_length(struct sock *sk)
+static int first_packet_length(struct sock *sk)
 {
        struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue;
        struct sk_buff *skb;
-       unsigned int res;
+       int res;
 
        __skb_queue_head_init(&list_kill);
 
@@ -1203,7 +1198,7 @@ static unsigned int first_packet_length(struct sock *sk)
                __skb_unlink(skb, rcvq);
                __skb_queue_tail(&list_kill, skb);
        }
-       res = skb ? skb->len : 0;
+       res = skb ? skb->len : -1;
        spin_unlock_bh(&rcvq->lock);
 
        if (!skb_queue_empty(&list_kill)) {
@@ -1232,7 +1227,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
        case SIOCINQ:
        {
-               unsigned int amount = first_packet_length(sk);
+               int amount = max_t(int, 0, first_packet_length(sk));
 
                return put_user(amount, (int __user *)arg);
        }
@@ -2184,7 +2179,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
 
        /* Check for false positives due to checksum errors */
        if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
-           !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk))
+           !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
                mask &= ~(POLLIN | POLLRDNORM);
 
        return mask;
@@ -2192,6 +2187,20 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
 }
 EXPORT_SYMBOL(udp_poll);
 
+int udp_abort(struct sock *sk, int err)
+{
+       lock_sock(sk);
+
+       sk->sk_err = err;
+       sk->sk_error_report(sk);
+       udp_disconnect(sk, 0);
+
+       release_sock(sk);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(udp_abort);
+
 struct proto udp_prot = {
        .name              = "UDP",
        .owner             = THIS_MODULE,
@@ -2216,13 +2225,12 @@ struct proto udp_prot = {
        .sysctl_wmem       = &sysctl_udp_wmem_min,
        .sysctl_rmem       = &sysctl_udp_rmem_min,
        .obj_size          = sizeof(struct udp_sock),
-       .slab_flags        = SLAB_DESTROY_BY_RCU,
        .h.udp_table       = &udp_table,
 #ifdef CONFIG_COMPAT
        .compat_setsockopt = compat_udp_setsockopt,
        .compat_getsockopt = compat_udp_getsockopt,
 #endif
-       .clear_sk          = sk_prot_clear_portaddr_nulls,
+       .diag_destroy      = udp_abort,
 };
 EXPORT_SYMBOL(udp_prot);
 
index 3d5ccf4..9a89c10 100644 (file)
@@ -20,7 +20,7 @@
 static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
                        struct netlink_callback *cb,
                        const struct inet_diag_req_v2 *req,
-                       struct nlattr *bc)
+                       struct nlattr *bc, bool net_admin)
 {
        if (!inet_diag_bc_sk(bc, sk))
                return 0;
@@ -28,7 +28,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
        return inet_sk_diag_fill(sk, NULL, skb, req,
                        sk_user_ns(NETLINK_CB(cb->skb).sk),
                        NETLINK_CB(cb->skb).portid,
-                       cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+                       cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, net_admin);
 }
 
 static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
@@ -76,7 +76,8 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
        err = inet_sk_diag_fill(sk, NULL, rep, req,
                           sk_user_ns(NETLINK_CB(in_skb).sk),
                           NETLINK_CB(in_skb).portid,
-                          nlh->nlmsg_seq, 0, nlh);
+                          nlh->nlmsg_seq, 0, nlh,
+                          netlink_net_capable(in_skb, CAP_NET_ADMIN));
        if (err < 0) {
                WARN_ON(err == -EMSGSIZE);
                kfree_skb(rep);
@@ -97,6 +98,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb,
                     struct netlink_callback *cb,
                     const struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
+       bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
        struct net *net = sock_net(skb->sk);
        int num, s_num, slot, s_slot;
 
@@ -132,7 +134,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb,
                            r->id.idiag_dport)
                                goto next;
 
-                       if (sk_diag_dump(sk, skb, cb, r, bc) < 0) {
+                       if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) {
                                spin_unlock_bh(&hslot->lock);
                                goto done;
                        }
@@ -165,12 +167,88 @@ static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
        r->idiag_wqueue = sk_wmem_alloc_get(sk);
 }
 
+#ifdef CONFIG_INET_DIAG_DESTROY
+static int __udp_diag_destroy(struct sk_buff *in_skb,
+                             const struct inet_diag_req_v2 *req,
+                             struct udp_table *tbl)
+{
+       struct net *net = sock_net(in_skb->sk);
+       struct sock *sk;
+       int err;
+
+       rcu_read_lock();
+
+       if (req->sdiag_family == AF_INET)
+               sk = __udp4_lib_lookup(net,
+                               req->id.idiag_dst[0], req->id.idiag_dport,
+                               req->id.idiag_src[0], req->id.idiag_sport,
+                               req->id.idiag_if, tbl, NULL);
+#if IS_ENABLED(CONFIG_IPV6)
+       else if (req->sdiag_family == AF_INET6) {
+               if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
+                   ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
+                       sk = __udp4_lib_lookup(net,
+                                       req->id.idiag_dst[3], req->id.idiag_dport,
+                                       req->id.idiag_src[3], req->id.idiag_sport,
+                                       req->id.idiag_if, tbl, NULL);
+
+               else
+                       sk = __udp6_lib_lookup(net,
+                                       (struct in6_addr *)req->id.idiag_dst,
+                                       req->id.idiag_dport,
+                                       (struct in6_addr *)req->id.idiag_src,
+                                       req->id.idiag_sport,
+                                       req->id.idiag_if, tbl, NULL);
+       }
+#endif
+       else {
+               rcu_read_unlock();
+               return -EINVAL;
+       }
+
+       if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+               sk = NULL;
+
+       rcu_read_unlock();
+
+       if (!sk)
+               return -ENOENT;
+
+       if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
+               sock_put(sk);
+               return -ENOENT;
+       }
+
+       err = sock_diag_destroy(sk, ECONNABORTED);
+
+       sock_put(sk);
+
+       return err;
+}
+
+static int udp_diag_destroy(struct sk_buff *in_skb,
+                           const struct inet_diag_req_v2 *req)
+{
+       return __udp_diag_destroy(in_skb, req, &udp_table);
+}
+
+static int udplite_diag_destroy(struct sk_buff *in_skb,
+                               const struct inet_diag_req_v2 *req)
+{
+       return __udp_diag_destroy(in_skb, req, &udplite_table);
+}
+
+#endif
+
 static const struct inet_diag_handler udp_diag_handler = {
        .dump            = udp_diag_dump,
        .dump_one        = udp_diag_dump_one,
        .idiag_get_info  = udp_diag_get_info,
        .idiag_type      = IPPROTO_UDP,
        .idiag_info_size = 0,
+#ifdef CONFIG_INET_DIAG_DESTROY
+       .destroy         = udp_diag_destroy,
+#endif
 };
 
 static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
@@ -192,6 +270,9 @@ static const struct inet_diag_handler udplite_diag_handler = {
        .idiag_get_info  = udp_diag_get_info,
        .idiag_type      = IPPROTO_UDPLITE,
        .idiag_info_size = 0,
+#ifdef CONFIG_INET_DIAG_DESTROY
+       .destroy         = udplite_diag_destroy,
+#endif
 };
 
 static int __init udp_diag_init(void)
index 3b3efbd..af81715 100644 (file)
@@ -55,13 +55,11 @@ struct proto        udplite_prot = {
        .unhash            = udp_lib_unhash,
        .get_port          = udp_v4_get_port,
        .obj_size          = sizeof(struct udp_sock),
-       .slab_flags        = SLAB_DESTROY_BY_RCU,
        .h.udp_table       = &udplite_table,
 #ifdef CONFIG_COMPAT
        .compat_setsockopt = compat_udp_setsockopt,
        .compat_getsockopt = compat_udp_getsockopt,
 #endif
-       .clear_sk          = sk_prot_clear_portaddr_nulls,
 };
 EXPORT_SYMBOL(udplite_prot);
 
index b644a23..6a7ff69 100644 (file)
@@ -29,7 +29,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
        memset(fl4, 0, sizeof(*fl4));
        fl4->daddr = daddr->a4;
        fl4->flowi4_tos = tos;
-       fl4->flowi4_oif = oif;
+       fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif);
        if (saddr)
                fl4->saddr = saddr->a4;
 
@@ -112,7 +112,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
        int oif = 0;
 
        if (skb_dst(skb))
-               oif = l3mdev_fib_oif(skb_dst(skb)->dev);
+               oif = skb_dst(skb)->dev->ifindex;
 
        memset(fl4, 0, sizeof(struct flowi4));
        fl4->flowi4_mark = skb->mark;
index df8425f..2f1f5d4 100644 (file)
@@ -778,7 +778,14 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
        }
 
        if (p == &net->ipv6.devconf_all->forwarding) {
+               int old_dflt = net->ipv6.devconf_dflt->forwarding;
+
                net->ipv6.devconf_dflt->forwarding = newf;
+               if ((!newf) ^ (!old_dflt))
+                       inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+                                                    NETCONFA_IFINDEX_DEFAULT,
+                                                    net->ipv6.devconf_dflt);
+
                addrconf_forward_change(net, newf);
                if ((!newf) ^ (!old))
                        inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
@@ -1872,7 +1879,6 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp)
 
 void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 {
-       struct in6_addr addr;
        struct inet6_dev *idev = ifp->idev;
        struct net *net = dev_net(ifp->idev->dev);
 
@@ -1934,18 +1940,6 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
                in6_ifa_put(ifp2);
 lock_errdad:
                spin_lock_bh(&ifp->lock);
-       } else if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) {
-               addr.s6_addr32[0] = htonl(0xfe800000);
-               addr.s6_addr32[1] = 0;
-
-               if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
-                   ipv6_addr_equal(&ifp->addr, &addr)) {
-                       /* DAD failed for link-local based on MAC address */
-                       idev->cnf.disable_ipv6 = 1;
-
-                       pr_info("%s: IPv6 being disabled!\n",
-                               ifp->idev->dev->name);
-               }
        }
 
 errdad:
@@ -1954,6 +1948,7 @@ errdad:
        spin_unlock_bh(&ifp->lock);
 
        addrconf_mod_dad_work(ifp, 0);
+       in6_ifa_put(ifp);
 }
 
 /* Join to solicited addr multicast group.
@@ -3821,6 +3816,7 @@ static void addrconf_dad_work(struct work_struct *w)
                                                dad_work);
        struct inet6_dev *idev = ifp->idev;
        struct in6_addr mcaddr;
+       bool disable_ipv6 = false;
 
        enum {
                DAD_PROCESS,
@@ -3837,6 +3833,24 @@ static void addrconf_dad_work(struct work_struct *w)
        } else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) {
                action = DAD_ABORT;
                ifp->state = INET6_IFADDR_STATE_POSTDAD;
+
+               if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6 &&
+                   !(ifp->flags & IFA_F_STABLE_PRIVACY)) {
+                       struct in6_addr addr;
+
+                       addr.s6_addr32[0] = htonl(0xfe800000);
+                       addr.s6_addr32[1] = 0;
+
+                       if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
+                           ipv6_addr_equal(&ifp->addr, &addr)) {
+                               /* DAD failed for link-local based on MAC */
+                               idev->cnf.disable_ipv6 = 1;
+
+                               pr_info("%s: IPv6 being disabled!\n",
+                                       ifp->idev->dev->name);
+                               disable_ipv6 = true;
+                       }
+               }
        }
        spin_unlock_bh(&ifp->lock);
 
@@ -3844,7 +3858,10 @@ static void addrconf_dad_work(struct work_struct *w)
                addrconf_dad_begin(ifp);
                goto out;
        } else if (action == DAD_ABORT) {
+               in6_ifa_hold(ifp);
                addrconf_dad_stop(ifp, 1);
+               if (disable_ipv6)
+                       addrconf_ifdown(idev->dev, 0);
                goto out;
        }
 
@@ -6017,7 +6034,7 @@ static const struct ctl_table addrconf_sysctl[] = {
 static int __addrconf_sysctl_register(struct net *net, char *dev_name,
                struct inet6_dev *idev, struct ipv6_devconf *p)
 {
-       int i;
+       int i, ifindex;
        struct ctl_table *table;
        char path[sizeof("net/ipv6/conf/") + IFNAMSIZ];
 
@@ -6037,6 +6054,13 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
        if (!p->sysctl_header)
                goto free;
 
+       if (!strcmp(dev_name, "all"))
+               ifindex = NETCONFA_IFINDEX_ALL;
+       else if (!strcmp(dev_name, "default"))
+               ifindex = NETCONFA_IFINDEX_DEFAULT;
+       else
+               ifindex = idev->dev->ifindex;
+       inet6_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
        return 0;
 
 free:
index b454055..46ad699 100644 (file)
@@ -545,6 +545,8 @@ const struct proto_ops inet6_stream_ops = {
        .mmap              = sock_no_mmap,
        .sendpage          = inet_sendpage,
        .splice_read       = tcp_splice_read,
+       .read_sock         = tcp_read_sock,
+       .peek_len          = tcp_peek_len,
 #ifdef CONFIG_COMPAT
        .compat_setsockopt = compat_sock_common_setsockopt,
        .compat_getsockopt = compat_sock_common_getsockopt,
index 5857c1f..eea23b5 100644 (file)
@@ -38,6 +38,9 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
                .flags = FIB_LOOKUP_NOREF,
        };
 
+       /* update flow if oif or iif point to device enslaved to l3mdev */
+       l3mdev_update_flow(net, flowi6_to_flowi(fl6));
+
        fib_rules_lookup(net->ipv6.fib6_rules_ops,
                         flowi6_to_flowi(fl6), flags, &arg);
 
index c8314c6..e50c27a 100644 (file)
@@ -51,7 +51,7 @@ drop:
        return -EINVAL;
 }
 
-static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
+static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
        [ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
        [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
 };
index e6eca5f..e604013 100644 (file)
@@ -128,7 +128,7 @@ static struct genl_family ila_nl_family = {
        .parallel_ops   = true,
 };
 
-static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
+static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
        [ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
        [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
        [ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
index 771be1f..ef54852 100644 (file)
@@ -743,6 +743,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
                   (info->nlh->nlmsg_flags & NLM_F_CREATE));
        int found = 0;
        bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
+       u16 nlflags = NLM_F_EXCL;
        int err;
 
        ins = &fn->leaf;
@@ -759,6 +760,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
                        if (info->nlh &&
                            (info->nlh->nlmsg_flags & NLM_F_EXCL))
                                return -EEXIST;
+
+                       nlflags &= ~NLM_F_EXCL;
                        if (replace) {
                                if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
                                        found++;
@@ -856,6 +859,7 @@ next_iter:
                        pr_warn("NLM_F_CREATE should be set when creating new route\n");
 
 add:
+               nlflags |= NLM_F_CREATE;
                err = fib6_commit_metrics(&rt->dst, mxc);
                if (err)
                        return err;
@@ -864,7 +868,7 @@ add:
                *ins = rt;
                rt->rt6i_node = fn;
                atomic_inc(&rt->rt6i_ref);
-               inet6_rt_notify(RTM_NEWROUTE, rt, info, 0);
+               inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
                info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
 
                if (!(fn->fn_flags & RTN_RTINFO)) {
index 1dfc402..6001e78 100644 (file)
@@ -56,6 +56,7 @@
 #include <net/checksum.h>
 #include <linux/mroute6.h>
 #include <net/l3mdev.h>
+#include <net/lwtunnel.h>
 
 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
@@ -104,6 +105,13 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
                }
        }
 
+       if (lwtunnel_xmit_redirect(dst->lwtstate)) {
+               int res = lwtunnel_xmit(skb);
+
+               if (res < 0 || res == LWTUNNEL_XMIT_DONE)
+                       return res;
+       }
+
        rcu_read_lock_bh();
        nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
        neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
@@ -228,6 +236,14 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
        if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
                IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
                              IPSTATS_MIB_OUT, skb->len);
+
+               /* if egress device is enslaved to an L3 master device pass the
+                * skb to its handler for processing
+                */
+               skb = l3mdev_ip6_out((struct sock *)sk, skb);
+               if (unlikely(!skb))
+                       return 0;
+
                /* hooks should never assume socket lock is held.
                 * we promote our socket to non const
                 */
@@ -910,13 +926,6 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
        int err;
        int flags = 0;
 
-       if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif &&
-           (!*dst || !(*dst)->error)) {
-               err = l3mdev_get_saddr6(net, sk, fl6);
-               if (err)
-                       goto out_err;
-       }
-
        /* The correct way to handle this would be to do
         * ip6_route_get_saddr, and then ip6_route_output; however,
         * the route-specific preferred source forces the
@@ -1008,7 +1017,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 out_err_release:
        dst_release(*dst);
        *dst = NULL;
-out_err:
+
        if (err == -ENETUNREACH)
                IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
        return err;
@@ -1054,8 +1063,6 @@ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
                return ERR_PTR(err);
        if (final_dst)
                fl6->daddr = *final_dst;
-       if (!fl6->flowi6_oif)
-               fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
 
        return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
 }
index 2050217..5c57797 100644 (file)
@@ -1174,6 +1174,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
                encap_limit = t->parms.encap_limit;
 
        memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+       fl6.flowi6_proto = IPPROTO_IPIP;
 
        dsfield = ipv4_get_dsfield(iph);
 
@@ -1233,6 +1234,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
                encap_limit = t->parms.encap_limit;
 
        memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+       fl6.flowi6_proto = IPPROTO_IPV6;
 
        dsfield = ipv6_get_dsfield(ipv6h);
        if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
index fe65cdc..d8e6714 100644 (file)
@@ -67,7 +67,6 @@
 #include <net/flow.h>
 #include <net/ip6_checksum.h>
 #include <net/inet_common.h>
-#include <net/l3mdev.h>
 #include <linux/proc_fs.h>
 
 #include <linux/netfilter.h>
@@ -457,11 +456,9 @@ static void ndisc_send_skb(struct sk_buff *skb,
 
        if (!dst) {
                struct flowi6 fl6;
-               int oif = l3mdev_fib_oif(skb->dev);
+               int oif = skb->dev->ifindex;
 
                icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
-               if (oif != skb->dev->ifindex)
-                       fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
                dst = icmp6_dst_alloc(skb->dev, &fl6);
                if (IS_ERR(dst)) {
                        kfree_skb(skb);
@@ -1538,7 +1535,6 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
        int rd_len;
        u8 ha_buf[MAX_ADDR_LEN], *ha = NULL,
           ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
-       int oif = l3mdev_fib_oif(dev);
        bool ret;
 
        if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
@@ -1555,10 +1551,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
        }
 
        icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
-                        &saddr_buf, &ipv6_hdr(skb)->saddr, oif);
-
-       if (oif != skb->dev->ifindex)
-               fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
+                        &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
 
        dst = ip6_route_output(net, NULL, &fl6);
        if (dst->error) {
index 8dd8696..c1bcf69 100644 (file)
@@ -379,8 +379,7 @@ static struct nf_logger nf_ip6_logger __read_mostly = {
 
 static int __net_init nf_log_ipv6_net_init(struct net *net)
 {
-       nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger);
-       return 0;
+       return nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger);
 }
 
 static void __net_exit nf_log_ipv6_net_exit(struct net *net)
index 533cd57..92bda99 100644 (file)
@@ -47,6 +47,7 @@ static const struct nft_expr_ops nft_reject_ipv6_ops = {
        .eval           = nft_reject_ipv6_eval,
        .init           = nft_reject_init,
        .dump           = nft_reject_dump,
+       .validate       = nft_reject_validate,
 };
 
 static struct nft_expr_type nft_reject_ipv6_type __read_mostly = {
index 462f2a7..7cca8ac 100644 (file)
@@ -148,6 +148,13 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
        ipv6_hdr(skb)->payload_len = htons(len);
        IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
 
+       /* if egress device is enslaved to an L3 master device pass the
+        * skb to its handler for processing
+        */
+       skb = l3mdev_ip6_out(sk, skb);
+       if (unlikely(!skb))
+               return 0;
+
        return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
                       net, sk, skb, NULL, skb_dst(skb)->dev,
                       dst_output);
index 0900352..0e983b6 100644 (file)
@@ -126,8 +126,10 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        rt = (struct rt6_info *) dst;
 
        np = inet6_sk(sk);
-       if (!np)
-               return -EBADF;
+       if (!np) {
+               err = -EBADF;
+               goto dst_err_out;
+       }
 
        if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
                fl6.flowi6_oif = np->mcast_oif;
@@ -163,6 +165,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        }
        release_sock(sk);
 
+dst_err_out:
+       dst_release(dst);
+
        if (err)
                return err;
 
index 590dd1f..54404f0 100644 (file)
@@ -653,6 +653,13 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
        if (err)
                goto error_fault;
 
+       /* if egress device is enslaved to an L3 master device pass the
+        * skb to its handler for processing
+        */
+       skb = l3mdev_ip6_out(sk, skb);
+       if (unlikely(!skb))
+               return 0;
+
        IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
        err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
                      NULL, rt->dst.dev, dst_output);
index 4981755..ad4a7ff 100644 (file)
@@ -1164,7 +1164,7 @@ void ip6_route_input(struct sk_buff *skb)
        int flags = RT6_LOOKUP_F_HAS_SADDR;
        struct ip_tunnel_info *tun_info;
        struct flowi6 fl6 = {
-               .flowi6_iif = l3mdev_fib_oif(skb->dev),
+               .flowi6_iif = skb->dev->ifindex,
                .daddr = iph->daddr,
                .saddr = iph->saddr,
                .flowlabel = ip6_flowinfo(iph),
@@ -1188,12 +1188,15 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
                                         struct flowi6 *fl6, int flags)
 {
-       struct dst_entry *dst;
        bool any_src;
 
-       dst = l3mdev_get_rt6_dst(net, fl6);
-       if (dst)
-               return dst;
+       if (rt6_need_strict(&fl6->daddr)) {
+               struct dst_entry *dst;
+
+               dst = l3mdev_link_scope_lookup(net, fl6);
+               if (dst)
+                       return dst;
+       }
 
        fl6->flowi6_iif = LOOPBACK_IFINDEX;
 
@@ -1604,7 +1607,9 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
        rcu_read_unlock();
 
 out:
-       return min_t(unsigned int, mtu, IP6_MAX_MTU);
+       mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
+
+       return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
 }
 
 static struct dst_entry *icmp6_dst_gc_list;
@@ -2556,8 +2561,16 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 {
        u32 tb_id;
        struct net *net = dev_net(idev->dev);
-       struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
-                                           DST_NOCOUNT);
+       struct net_device *dev = net->loopback_dev;
+       struct rt6_info *rt;
+
+       /* use L3 Master device as loopback for host routes if device
+        * is enslaved and address is not link local or multicast
+        */
+       if (!rt6_need_strict(addr))
+               dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
+
+       rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
        if (!rt)
                return ERR_PTR(-ENOMEM);
 
@@ -3336,11 +3349,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
        } else {
                fl6.flowi6_oif = oif;
 
-               if (netif_index_is_l3_master(net, oif)) {
-                       fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
-                                          FLOWI_FLAG_SKIP_NH_OIF;
-               }
-
                rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
        }
 
index 33df8b8..54cf719 100644 (file)
@@ -671,6 +671,7 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
                                      NULL, skb);
 
        if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+               NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
                net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
                                     genhash ? "failed" : "mismatch",
                                     &ip6h->saddr, ntohs(th->source),
@@ -817,12 +818,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
        fl6.flowi6_proto = IPPROTO_TCP;
        if (rt6_need_strict(&fl6.daddr) && !oif)
                fl6.flowi6_oif = tcp_v6_iif(skb);
-       else {
-               if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
-                       oif = skb->skb_iif;
-
-               fl6.flowi6_oif = oif;
-       }
+       else
+               fl6.flowi6_oif = oif ? : skb->skb_iif;
 
        fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
        fl6.fl6_dport = t1->dest;
@@ -944,9 +941,15 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
        /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
         * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
         */
+       /* RFC 7323 2.3
+        * The window field (SEG.WND) of every outgoing segment, with the
+        * exception of <SYN> segments, MUST be right-shifted by
+        * Rcv.Wind.Shift bits:
+        */
        tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
                        tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
-                       tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
+                       tcp_rsk(req)->rcv_nxt,
+                       req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
                        tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
                        tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
                        0, 0);
@@ -1409,6 +1412,7 @@ process:
                sk = req->rsk_listener;
                tcp_v6_fill_cb(skb, hdr, th);
                if (tcp_v6_inbound_md5_hash(sk, skb)) {
+                       sk_drops_add(sk, skb);
                        reqsk_put(req);
                        goto discard_it;
                }
@@ -1465,10 +1469,7 @@ process:
        if (!sock_owned_by_user(sk)) {
                if (!tcp_prequeue(sk, skb))
                        ret = tcp_v6_do_rcv(sk, skb);
-       } else if (unlikely(sk_add_backlog(sk, skb,
-                                          sk->sk_rcvbuf + sk->sk_sndbuf))) {
-               bh_unlock_sock(sk);
-               __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
+       } else if (tcp_add_backlog(sk, skb)) {
                goto discard_and_relse;
        }
        bh_unlock_sock(sk);
@@ -1862,17 +1863,6 @@ void tcp6_proc_exit(struct net *net)
 }
 #endif
 
-static void tcp_v6_clear_sk(struct sock *sk, int size)
-{
-       struct inet_sock *inet = inet_sk(sk);
-
-       /* we do not want to clear pinet6 field, because of RCU lookups */
-       sk_prot_clear_nulls(sk, offsetof(struct inet_sock, pinet6));
-
-       size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
-       memset(&inet->pinet6 + 1, 0, size);
-}
-
 struct proto tcpv6_prot = {
        .name                   = "TCPv6",
        .owner                  = THIS_MODULE,
@@ -1914,7 +1904,6 @@ struct proto tcpv6_prot = {
        .compat_setsockopt      = compat_tcp_setsockopt,
        .compat_getsockopt      = compat_tcp_getsockopt,
 #endif
-       .clear_sk               = tcp_v6_clear_sk,
        .diag_destroy           = tcp_abort,
 };
 
index 81e2f98..9aa7c1c 100644 (file)
@@ -1424,17 +1424,6 @@ void udp6_proc_exit(struct net *net)
 }
 #endif /* CONFIG_PROC_FS */
 
-void udp_v6_clear_sk(struct sock *sk, int size)
-{
-       struct inet_sock *inet = inet_sk(sk);
-
-       /* we do not want to clear pinet6 field, because of RCU lookups */
-       sk_prot_clear_portaddr_nulls(sk, offsetof(struct inet_sock, pinet6));
-
-       size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
-       memset(&inet->pinet6 + 1, 0, size);
-}
-
 /* ------------------------------------------------------------------------ */
 
 struct proto udpv6_prot = {
@@ -1460,13 +1449,12 @@ struct proto udpv6_prot = {
        .sysctl_wmem       = &sysctl_udp_wmem_min,
        .sysctl_rmem       = &sysctl_udp_rmem_min,
        .obj_size          = sizeof(struct udp6_sock),
-       .slab_flags        = SLAB_DESTROY_BY_RCU,
        .h.udp_table       = &udp_table,
 #ifdef CONFIG_COMPAT
        .compat_setsockopt = compat_udpv6_setsockopt,
        .compat_getsockopt = compat_udpv6_getsockopt,
 #endif
-       .clear_sk          = udp_v6_clear_sk,
+       .diag_destroy      = udp_abort,
 };
 
 static struct inet_protosw udpv6_protosw = {
index 0682c03..f6eb1ab 100644 (file)
@@ -29,8 +29,6 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
 int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 void udpv6_destroy_sock(struct sock *sk);
 
-void udp_v6_clear_sk(struct sock *sk, int size);
-
 #ifdef CONFIG_PROC_FS
 int udp6_seq_show(struct seq_file *seq, void *v);
 #endif
index 9cf097e..47d0d2b 100644 (file)
@@ -50,13 +50,11 @@ struct proto udplitev6_prot = {
        .unhash            = udp_lib_unhash,
        .get_port          = udp_v6_get_port,
        .obj_size          = sizeof(struct udp6_sock),
-       .slab_flags        = SLAB_DESTROY_BY_RCU,
        .h.udp_table       = &udplite_table,
 #ifdef CONFIG_COMPAT
        .compat_setsockopt = compat_udpv6_setsockopt,
        .compat_getsockopt = compat_udpv6_getsockopt,
 #endif
-       .clear_sk          = udp_v6_clear_sk,
 };
 
 static struct inet_protosw udplite6_protosw = {
index 0eaab1f..00a2d40 100644 (file)
@@ -23,6 +23,7 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb)
 
 int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
 {
+       XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
        XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
        XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
        return xfrm_input(skb, nexthdr, spi, 0);
index 6cc9700..e0f71c0 100644 (file)
@@ -36,7 +36,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
        int err;
 
        memset(&fl6, 0, sizeof(fl6));
-       fl6.flowi6_oif = oif;
+       fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif);
        fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
        memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
        if (saddr)
@@ -134,7 +134,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
        nexthdr = nh[nhoff];
 
        if (skb_dst(skb))
-               oif = l3mdev_fib_oif(skb_dst(skb)->dev);
+               oif = skb_dst(skb)->dev->ifindex;
 
        memset(fl6, 0, sizeof(struct flowi6));
        fl6->flowi6_mark = skb->mark;
index 8d2f7c9..db63969 100644 (file)
@@ -845,9 +845,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
        if (sock->state != SS_UNCONNECTED)
                goto out;
 
-       if ((sk = sock->sk) == NULL)
-               goto out;
-
        err = -EOPNOTSUPP;
        if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) &&
            (sk->sk_type != SOCK_DGRAM))
index 47e4453..bf75c92 100644 (file)
@@ -173,14 +173,24 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
        if (psock->strp.rx_stopped)
                seq_puts(seq, "RxStop ");
 
-       if (psock->strp.rx_paused)
-               seq_puts(seq, "RxPause ");
-
        if (psock->tx_kcm)
                seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index);
 
-       if (psock->ready_rx_msg)
-               seq_puts(seq, "RdyRx ");
+       if (!psock->strp.rx_paused && !psock->ready_rx_msg) {
+               if (psock->sk->sk_receive_queue.qlen) {
+                       if (psock->strp.rx_need_bytes)
+                               seq_printf(seq, "RxWait=%u ",
+                                          psock->strp.rx_need_bytes);
+                       else
+                               seq_printf(seq, "RxWait ");
+               }
+       } else  {
+               if (psock->strp.rx_paused)
+                       seq_puts(seq, "RxPause ");
+
+               if (psock->ready_rx_msg)
+                       seq_puts(seq, "RdyRx ");
+       }
 
        seq_puts(seq, "\n");
 }
index eedbe40..b7f869a 100644 (file)
 #include <linux/socket.h>
 #include <linux/uaccess.h>
 #include <linux/workqueue.h>
+#include <linux/syscalls.h>
 #include <net/kcm.h>
 #include <net/netns/generic.h>
 #include <net/sock.h>
-#include <net/tcp.h>
 #include <uapi/linux/kcm.h>
 
 unsigned int kcm_net_id;
@@ -340,7 +340,7 @@ static void unreserve_rx_kcm(struct kcm_psock *psock,
 }
 
 /* Lower sock lock held */
-static void psock_tcp_data_ready(struct sock *sk)
+static void psock_data_ready(struct sock *sk)
 {
        struct kcm_psock *psock;
 
@@ -348,7 +348,7 @@ static void psock_tcp_data_ready(struct sock *sk)
 
        psock = (struct kcm_psock *)sk->sk_user_data;
        if (likely(psock))
-               strp_tcp_data_ready(&psock->strp);
+               strp_data_ready(&psock->strp);
 
        read_unlock_bh(&sk->sk_callback_lock);
 }
@@ -392,7 +392,7 @@ static int kcm_read_sock_done(struct strparser *strp, int err)
        return err;
 }
 
-static void psock_tcp_state_change(struct sock *sk)
+static void psock_state_change(struct sock *sk)
 {
        /* TCP only does a POLLIN for a half close. Do a POLLHUP here
         * since application will normally not poll with POLLIN
@@ -402,7 +402,7 @@ static void psock_tcp_state_change(struct sock *sk)
        report_csk_error(sk, EPIPE);
 }
 
-static void psock_tcp_write_space(struct sock *sk)
+static void psock_write_space(struct sock *sk)
 {
        struct kcm_psock *psock;
        struct kcm_mux *mux;
@@ -1383,19 +1383,12 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
        struct list_head *head;
        int index = 0;
        struct strp_callbacks cb;
-
-       if (csock->ops->family != PF_INET &&
-           csock->ops->family != PF_INET6)
-               return -EINVAL;
+       int err;
 
        csk = csock->sk;
        if (!csk)
                return -EINVAL;
 
-       /* Only support TCP for now */
-       if (csk->sk_protocol != IPPROTO_TCP)
-               return -EINVAL;
-
        psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
        if (!psock)
                return -ENOMEM;
@@ -1409,7 +1402,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
        cb.parse_msg = kcm_parse_func_strparser;
        cb.read_sock_done = kcm_read_sock_done;
 
-       strp_init(&psock->strp, csk, &cb);
+       err = strp_init(&psock->strp, csk, &cb);
+       if (err) {
+               kmem_cache_free(kcm_psockp, psock);
+               return err;
+       }
 
        sock_hold(csk);
 
@@ -1418,9 +1415,9 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
        psock->save_write_space = csk->sk_write_space;
        psock->save_state_change = csk->sk_state_change;
        csk->sk_user_data = psock;
-       csk->sk_data_ready = psock_tcp_data_ready;
-       csk->sk_write_space = psock_tcp_write_space;
-       csk->sk_state_change = psock_tcp_state_change;
+       csk->sk_data_ready = psock_data_ready;
+       csk->sk_write_space = psock_write_space;
+       csk->sk_state_change = psock_state_change;
        write_unlock_bh(&csk->sk_callback_lock);
 
        /* Finished initialization, now add the psock to the MUX. */
@@ -1477,12 +1474,13 @@ out:
        return err;
 }
 
-/* Lower socket lock held */
 static void kcm_unattach(struct kcm_psock *psock)
 {
        struct sock *csk = psock->sk;
        struct kcm_mux *mux = psock->mux;
 
+       lock_sock(csk);
+
        /* Stop getting callbacks from TCP socket. After this there should
         * be no way to reserve a kcm for this psock.
         */
@@ -1514,7 +1512,10 @@ static void kcm_unattach(struct kcm_psock *psock)
 
        write_unlock_bh(&csk->sk_callback_lock);
 
+       /* Call strp_done without sock lock */
+       release_sock(csk);
        strp_done(&psock->strp);
+       lock_sock(csk);
 
        bpf_prog_put(psock->bpf_prog);
 
@@ -1564,6 +1565,8 @@ no_reserved:
                fput(csk->sk_socket->file);
                kmem_cache_free(kcm_psockp, psock);
        }
+
+       release_sock(csk);
 }
 
 static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
@@ -1719,7 +1722,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
                        if (copy_to_user((void __user *)arg, &info,
                                         sizeof(info))) {
                                err = -EFAULT;
-                               sock_release(newsock);
+                               sys_close(info.fd);
                        }
                }
 
@@ -1749,11 +1752,8 @@ static void release_mux(struct kcm_mux *mux)
        /* Release psocks */
        list_for_each_entry_safe(psock, tmp_psock,
                                 &mux->psocks, psock_list) {
-               if (!WARN_ON(psock->unattaching)) {
-                       lock_sock(psock->strp.sk);
+               if (!WARN_ON(psock->unattaching))
                        kcm_unattach(psock);
-                       release_sock(psock->strp.sk);
-               }
        }
 
        if (WARN_ON(mux->psocks_cnt))
index 1e40dac..a2ed3bd 100644 (file)
@@ -1855,6 +1855,9 @@ static __net_exit void l2tp_exit_net(struct net *net)
                (void)l2tp_tunnel_delete(tunnel);
        }
        rcu_read_unlock_bh();
+
+       flush_workqueue(l2tp_wq);
+       rcu_barrier();
 }
 
 static struct pernet_operations l2tp_net_ops = {
index 5871537..2599af6 100644 (file)
@@ -139,7 +139,7 @@ struct l2tp_session {
        void (*session_close)(struct l2tp_session *session);
        void (*ref)(struct l2tp_session *session);
        void (*deref)(struct l2tp_session *session);
-#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
+#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
        void (*show)(struct seq_file *m, void *priv);
 #endif
        uint8_t                 priv[0];        /* private data */
index 57fc5a4..ef2cd30 100644 (file)
@@ -195,7 +195,7 @@ static void l2tp_eth_delete(struct l2tp_session *session)
        }
 }
 
-#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
+#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
 static void l2tp_eth_show(struct seq_file *m, void *arg)
 {
        struct l2tp_session *session = arg;
@@ -268,7 +268,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
        priv->tunnel_sock = tunnel->sock;
        session->recv_skb = l2tp_eth_dev_recv;
        session->session_close = l2tp_eth_delete;
-#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
+#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
        session->show = l2tp_eth_show;
 #endif
 
index 1d02e8d..bf31177 100644 (file)
@@ -867,7 +867,7 @@ out:
        return skb->len;
 }
 
-static struct nla_policy l2tp_nl_policy[L2TP_ATTR_MAX + 1] = {
+static const struct nla_policy l2tp_nl_policy[L2TP_ATTR_MAX + 1] = {
        [L2TP_ATTR_NONE]                = { .type = NLA_UNSPEC, },
        [L2TP_ATTR_PW_TYPE]             = { .type = NLA_U16, },
        [L2TP_ATTR_ENCAP_TYPE]          = { .type = NLA_U16, },
index d9560aa..41d47bf 100644 (file)
@@ -177,7 +177,7 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb)
        if (!pskb_may_pull(skb, 2))
                return 1;
 
-       if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
+       if ((skb->data[0] == PPP_ALLSTATIONS) && (skb->data[1] == PPP_UI))
                skb_pull(skb, 2);
 
        return 0;
@@ -282,7 +282,6 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session)
 static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
                            size_t total_len)
 {
-       static const unsigned char ppph[2] = { 0xff, 0x03 };
        struct sock *sk = sock->sk;
        struct sk_buff *skb;
        int error;
@@ -312,7 +311,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
        error = -ENOMEM;
        skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
                           uhlen + session->hdr_len +
-                          sizeof(ppph) + total_len,
+                          2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
                           0, GFP_KERNEL);
        if (!skb)
                goto error_put_sess_tun;
@@ -325,8 +324,8 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
        skb_reserve(skb, uhlen);
 
        /* Add PPP header */
-       skb->data[0] = ppph[0];
-       skb->data[1] = ppph[1];
+       skb->data[0] = PPP_ALLSTATIONS;
+       skb->data[1] = PPP_UI;
        skb_put(skb, 2);
 
        /* Copy user data into skb */
@@ -369,7 +368,6 @@ error:
  */
 static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 {
-       static const u8 ppph[2] = { 0xff, 0x03 };
        struct sock *sk = (struct sock *) chan->private;
        struct sock *sk_tun;
        struct l2tp_session *session;
@@ -398,14 +396,14 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
                   sizeof(struct iphdr) + /* IP header */
                   uhlen +              /* UDP header (if L2TP_ENCAPTYPE_UDP) */
                   session->hdr_len +   /* L2TP header */
-                  sizeof(ppph);        /* PPP header */
+                  2;                   /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
        if (skb_cow_head(skb, headroom))
                goto abort_put_sess_tun;
 
        /* Setup PPP header */
-       __skb_push(skb, sizeof(ppph));
-       skb->data[0] = ppph[0];
-       skb->data[1] = ppph[1];
+       __skb_push(skb, 2);
+       skb->data[0] = PPP_ALLSTATIONS;
+       skb->data[1] = PPP_UI;
 
        local_bh_disable();
        l2tp_xmit_skb(session, skb, session->hdr_len);
@@ -440,7 +438,7 @@ static void pppol2tp_session_close(struct l2tp_session *session)
        BUG_ON(session->magic != L2TP_SESSION_MAGIC);
 
        if (sock) {
-               inet_shutdown(sock, 2);
+               inet_shutdown(sock, SEND_SHUTDOWN);
                /* Don't let the session go away before our socket does */
                l2tp_session_inc_refcount(session);
        }
@@ -554,7 +552,7 @@ out:
        return error;
 }
 
-#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
+#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
 static void pppol2tp_show(struct seq_file *m, void *arg)
 {
        struct l2tp_session *session = arg;
@@ -725,7 +723,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 
        session->recv_skb       = pppol2tp_recv;
        session->session_close  = pppol2tp_session_close;
-#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
+#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
        session->show           = pppol2tp_show;
 #endif
 
@@ -856,7 +854,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
        error = -ENOTCONN;
        if (sk == NULL)
                goto end;
-       if (sk->sk_state != PPPOX_CONNECTED)
+       if (!(sk->sk_state & PPPOX_CONNECTED))
                goto end;
 
        error = -EBADF;
index c4a1c3e..8da86ce 100644 (file)
@@ -100,15 +100,14 @@ u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
 EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
 
 /**
- *     l3mdev_get_rt6_dst - IPv6 route lookup based on flow. Returns
- *                          cached route for L3 master device if relevant
- *                          to flow
+ *     l3mdev_link_scope_lookup - IPv6 route lookup based on flow for link
+ *                          local and multicast addresses
  *     @net: network namespace for device index lookup
  *     @fl6: IPv6 flow struct for lookup
  */
 
-struct dst_entry *l3mdev_get_rt6_dst(struct net *net,
-                                    struct flowi6 *fl6)
+struct dst_entry *l3mdev_link_scope_lookup(struct net *net,
+                                          struct flowi6 *fl6)
 {
        struct dst_entry *dst = NULL;
        struct net_device *dev;
@@ -121,70 +120,15 @@ struct dst_entry *l3mdev_get_rt6_dst(struct net *net,
                        dev = netdev_master_upper_dev_get_rcu(dev);
 
                if (dev && netif_is_l3_master(dev) &&
-                   dev->l3mdev_ops->l3mdev_get_rt6_dst)
-                       dst = dev->l3mdev_ops->l3mdev_get_rt6_dst(dev, fl6);
+                   dev->l3mdev_ops->l3mdev_link_scope_lookup)
+                       dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6);
 
                rcu_read_unlock();
        }
 
        return dst;
 }
-EXPORT_SYMBOL_GPL(l3mdev_get_rt6_dst);
-
-/**
- *     l3mdev_get_saddr - get source address for a flow based on an interface
- *                        enslaved to an L3 master device
- *     @net: network namespace for device index lookup
- *     @ifindex: Interface index
- *     @fl4: IPv4 flow struct
- */
-
-int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4)
-{
-       struct net_device *dev;
-       int rc = 0;
-
-       if (ifindex) {
-               rcu_read_lock();
-
-               dev = dev_get_by_index_rcu(net, ifindex);
-               if (dev && netif_is_l3_slave(dev))
-                       dev = netdev_master_upper_dev_get_rcu(dev);
-
-               if (dev && netif_is_l3_master(dev) &&
-                   dev->l3mdev_ops->l3mdev_get_saddr)
-                       rc = dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4);
-
-               rcu_read_unlock();
-       }
-
-       return rc;
-}
-EXPORT_SYMBOL_GPL(l3mdev_get_saddr);
-
-int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
-                     struct flowi6 *fl6)
-{
-       struct net_device *dev;
-       int rc = 0;
-
-       if (fl6->flowi6_oif) {
-               rcu_read_lock();
-
-               dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
-               if (dev && netif_is_l3_slave(dev))
-                       dev = netdev_master_upper_dev_get_rcu(dev);
-
-               if (dev && netif_is_l3_master(dev) &&
-                   dev->l3mdev_ops->l3mdev_get_saddr6)
-                       rc = dev->l3mdev_ops->l3mdev_get_saddr6(dev, sk, fl6);
-
-               rcu_read_unlock();
-       }
-
-       return rc;
-}
-EXPORT_SYMBOL_GPL(l3mdev_get_saddr6);
+EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup);
 
 /**
  *     l3mdev_fib_rule_match - Determine if flowi references an
@@ -222,3 +166,38 @@ out:
 
        return rc;
 }
+
+void l3mdev_update_flow(struct net *net, struct flowi *fl)
+{
+       struct net_device *dev;
+       int ifindex;
+
+       rcu_read_lock();
+
+       if (fl->flowi_oif) {
+               dev = dev_get_by_index_rcu(net, fl->flowi_oif);
+               if (dev) {
+                       ifindex = l3mdev_master_ifindex_rcu(dev);
+                       if (ifindex) {
+                               fl->flowi_oif = ifindex;
+                               fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
+                               goto out;
+                       }
+               }
+       }
+
+       if (fl->flowi_iif) {
+               dev = dev_get_by_index_rcu(net, fl->flowi_iif);
+               if (dev) {
+                       ifindex = l3mdev_master_ifindex_rcu(dev);
+                       if (ifindex) {
+                               fl->flowi_iif = ifindex;
+                               fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
+                       }
+               }
+       }
+
+out:
+       rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(l3mdev_update_flow);
index b5d28f1..afca7d1 100644 (file)
@@ -333,10 +333,11 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
        if (!uc.center_freq1)
                return;
 
-       /* proceed to downgrade the chandef until usable or the same */
+       /* proceed to downgrade the chandef until usable or the same as AP BW */
        while (uc.width > max_width ||
-              !cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc,
-                                             sdata->wdev.iftype))
+              (uc.width > sta->tdls_chandef.width &&
+               !cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc,
+                                              sdata->wdev.iftype)))
                ieee80211_chandef_downgrade(&uc);
 
        if (!cfg80211_chandef_identical(&uc, &sta->tdls_chandef)) {
index 5c161e7..0e4334c 100644 (file)
@@ -961,9 +961,6 @@ static void mpls_ifdown(struct net_device *dev, int event)
                                RCU_INIT_POINTER(nh->nh_dev, NULL);
                } endfor_nexthops(rt);
        }
-
-
-       return;
 }
 
 static void mpls_ifup(struct net_device *dev, unsigned int nh_flags)
@@ -997,8 +994,6 @@ static void mpls_ifup(struct net_device *dev, unsigned int nh_flags)
 
                ACCESS_ONCE(rt->rt_nhn_alive) = alive;
        }
-
-       return;
 }
 
 static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
index 2055e57..b4da6d8 100644 (file)
@@ -23,32 +23,50 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
                                       netdev_features_t features)
 {
        struct sk_buff *segs = ERR_PTR(-EINVAL);
+       u16 mac_offset = skb->mac_header;
        netdev_features_t mpls_features;
+       u16 mac_len = skb->mac_len;
        __be16 mpls_protocol;
+       unsigned int mpls_hlen;
+
+       skb_reset_network_header(skb);
+       mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb);
+       if (unlikely(!pskb_may_pull(skb, mpls_hlen)))
+               goto out;
 
        /* Setup inner SKB. */
        mpls_protocol = skb->protocol;
        skb->protocol = skb->inner_protocol;
 
-       /* Push back the mac header that skb_mac_gso_segment() has pulled.
-        * It will be re-pulled by the call to skb_mac_gso_segment() below
-        */
-       __skb_push(skb, skb->mac_len);
+       __skb_pull(skb, mpls_hlen);
+
+       skb->mac_len = 0;
+       skb_reset_mac_header(skb);
 
        /* Segment inner packet. */
        mpls_features = skb->dev->mpls_features & features;
        segs = skb_mac_gso_segment(skb, mpls_features);
+       if (IS_ERR_OR_NULL(segs)) {
+               skb_gso_error_unwind(skb, mpls_protocol, mpls_hlen, mac_offset,
+                                    mac_len);
+               goto out;
+       }
+       skb = segs;
+
+       mpls_hlen += mac_len;
+       do {
+               skb->mac_len = mac_len;
+               skb->protocol = mpls_protocol;
 
+               skb_reset_inner_network_header(skb);
 
-       /* Restore outer protocol. */
-       skb->protocol = mpls_protocol;
+               __skb_push(skb, mpls_hlen);
 
-       /* Re-pull the mac header that the call to skb_mac_gso_segment()
-        * above pulled.  It will be re-pushed after returning
-        * skb_mac_gso_segment(), an indirect caller of this function.
-        */
-       __skb_pull(skb, skb->data - skb_mac_header(skb));
+               skb_reset_mac_header(skb);
+               skb_set_network_header(skb, mac_len);
+       } while ((skb = skb->next));
 
+out:
        return segs;
 }
 
index 644a8da..cf52cf3 100644 (file)
@@ -37,7 +37,7 @@ static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en)
        return en->labels * sizeof(struct mpls_shim_hdr);
 }
 
-static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int mpls_xmit(struct sk_buff *skb)
 {
        struct mpls_iptunnel_encap *tun_encap_info;
        struct mpls_shim_hdr *hdr;
@@ -90,7 +90,11 @@ static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb)
        if (skb_cow(skb, hh_len + new_header_size))
                goto drop;
 
+       skb_set_inner_protocol(skb, skb->protocol);
+       skb_reset_inner_network_header(skb);
+
        skb_push(skb, new_header_size);
+
        skb_reset_network_header(skb);
 
        skb->dev = out_dev;
@@ -115,7 +119,7 @@ static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb)
                net_dbg_ratelimited("%s: packet transmission failed: %d\n",
                                    __func__, err);
 
-       return 0;
+       return LWTUNNEL_XMIT_DONE;
 
 drop:
        kfree_skb(skb);
@@ -153,7 +157,8 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla,
        if (ret)
                goto errout;
        newts->type = LWTUNNEL_ENCAP_MPLS;
-       newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+       newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT;
+       newts->headroom = mpls_encap_size(tun_encap_info);
 
        *ts = newts;
 
@@ -209,7 +214,7 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
 
 static const struct lwtunnel_encap_ops mpls_iptun_ops = {
        .build_state = mpls_build_state,
-       .output = mpls_output,
+       .xmit = mpls_xmit,
        .fill_encap = mpls_fill_encap_info,
        .get_encap_size = mpls_encap_nlsize,
        .cmp_encap = mpls_encap_cmp,
index 9266cee..e8d56d9 100644 (file)
@@ -474,6 +474,12 @@ config NFT_META
          This option adds the "meta" expression that you can use to match and
          to set packet metainformation such as the packet mark.
 
+config NFT_NUMGEN
+       tristate "Netfilter nf_tables number generator module"
+       help
+         This option adds the number generator expression used to perform
+         incremental counting and random numbers bound to a upper limit.
+
 config NFT_CT
        depends on NF_CONNTRACK
        tristate "Netfilter nf_tables conntrack module"
@@ -481,13 +487,13 @@ config NFT_CT
          This option adds the "meta" expression that you can use to match
          connection tracking information such as the flow state.
 
-config NFT_RBTREE
+config NFT_SET_RBTREE
        tristate "Netfilter nf_tables rbtree set module"
        help
          This option adds the "rbtree" set type (Red Black tree) that is used
          to build interval-based sets.
 
-config NFT_HASH
+config NFT_SET_HASH
        tristate "Netfilter nf_tables hash set module"
        help
          This option adds the "hash" set type that is used to build one-way
@@ -542,6 +548,12 @@ config NFT_QUEUE
          This is required if you intend to use the userspace queueing
          infrastructure (also known as NFQUEUE) from nftables.
 
+config NFT_QUOTA
+       tristate "Netfilter nf_tables quota module"
+       help
+         This option adds the "quota" expression that you can use to match
+         enforce bytes quotas.
+
 config NFT_REJECT
        default m if NETFILTER_ADVANCED=n
        tristate "Netfilter nf_tables reject support"
@@ -563,6 +575,12 @@ config NFT_COMPAT
          x_tables match/target extensions over the nf_tables
          framework.
 
+config NFT_HASH
+       tristate "Netfilter nf_tables hash module"
+       help
+         This option adds the "hash" expression that you can use to perform
+         a hash operation on registers.
+
 if NF_TABLES_NETDEV
 
 config NF_DUP_NETDEV
index 6913454..0c85811 100644 (file)
@@ -80,18 +80,21 @@ obj-$(CONFIG_NF_TABLES_NETDEV)      += nf_tables_netdev.o
 obj-$(CONFIG_NFT_COMPAT)       += nft_compat.o
 obj-$(CONFIG_NFT_EXTHDR)       += nft_exthdr.o
 obj-$(CONFIG_NFT_META)         += nft_meta.o
+obj-$(CONFIG_NFT_NUMGEN)       += nft_numgen.o
 obj-$(CONFIG_NFT_CT)           += nft_ct.o
 obj-$(CONFIG_NFT_LIMIT)                += nft_limit.o
 obj-$(CONFIG_NFT_NAT)          += nft_nat.o
 obj-$(CONFIG_NFT_QUEUE)                += nft_queue.o
+obj-$(CONFIG_NFT_QUOTA)                += nft_quota.o
 obj-$(CONFIG_NFT_REJECT)       += nft_reject.o
 obj-$(CONFIG_NFT_REJECT_INET)  += nft_reject_inet.o
-obj-$(CONFIG_NFT_RBTREE)       += nft_rbtree.o
-obj-$(CONFIG_NFT_HASH)         += nft_hash.o
+obj-$(CONFIG_NFT_SET_RBTREE)   += nft_set_rbtree.o
+obj-$(CONFIG_NFT_SET_HASH)     += nft_set_hash.o
 obj-$(CONFIG_NFT_COUNTER)      += nft_counter.o
 obj-$(CONFIG_NFT_LOG)          += nft_log.o
 obj-$(CONFIG_NFT_MASQ)         += nft_masq.o
 obj-$(CONFIG_NFT_REDIR)                += nft_redir.o
+obj-$(CONFIG_NFT_HASH)         += nft_hash.o
 
 # nf_tables netdev
 obj-$(CONFIG_NFT_DUP_NETDEV)   += nft_dup_netdev.o
index f04fd8d..fc230d9 100644 (file)
@@ -281,13 +281,10 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
        h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple);
        if (h) {
                ct = nf_ct_tuplehash_to_ctrack(h);
-               /* Show what happens instead of calling nf_ct_kill() */
-               if (del_timer(&ct->timeout)) {
-                       IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
+               if (nf_ct_kill(ct)) {
+                       IP_VS_DBG(7, "%s: ct=%p, deleted conntrack for tuple="
                                FMT_TUPLE "\n",
                                __func__, ct, ARG_TUPLE(&tuple));
-                       if (ct->timeout.function)
-                               ct->timeout.function(ct->timeout.data);
                } else {
                        IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
                                FMT_TUPLE "\n",
index dd2c43a..ac1db40 100644 (file)
@@ -72,12 +72,24 @@ EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
 struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_hash);
 
+struct conntrack_gc_work {
+       struct delayed_work     dwork;
+       u32                     last_bucket;
+       bool                    exiting;
+};
+
 static __read_mostly struct kmem_cache *nf_conntrack_cachep;
 static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
-static __read_mostly seqcount_t nf_conntrack_generation;
 static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
 static __read_mostly bool nf_conntrack_locks_all;
 
+#define GC_MAX_BUCKETS_DIV     64u
+#define GC_MAX_BUCKETS         8192u
+#define GC_INTERVAL            (5 * HZ)
+#define GC_MAX_EVICTS          256u
+
+static struct conntrack_gc_work conntrack_gc_work;
+
 void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
 {
        spin_lock(lock);
@@ -164,7 +176,7 @@ unsigned int nf_conntrack_htable_size __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 
 unsigned int nf_conntrack_max __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_max);
+seqcount_t nf_conntrack_generation __read_mostly;
 
 DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
 EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
@@ -372,7 +384,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
 
        pr_debug("destroy_conntrack(%p)\n", ct);
        NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
-       NF_CT_ASSERT(!timer_pending(&ct->timeout));
 
        if (unlikely(nf_ct_is_template(ct))) {
                nf_ct_tmpl_free(ct);
@@ -435,35 +446,30 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
 {
        struct nf_conn_tstamp *tstamp;
 
+       if (test_and_set_bit(IPS_DYING_BIT, &ct->status))
+               return false;
+
        tstamp = nf_conn_tstamp_find(ct);
        if (tstamp && tstamp->stop == 0)
                tstamp->stop = ktime_get_real_ns();
 
-       if (nf_ct_is_dying(ct))
-               goto delete;
-
        if (nf_conntrack_event_report(IPCT_DESTROY, ct,
                                    portid, report) < 0) {
-               /* destroy event was not delivered */
+               /* destroy event was not delivered. nf_ct_put will
+                * be done by event cache worker on redelivery.
+                */
                nf_ct_delete_from_lists(ct);
                nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
                return false;
        }
 
        nf_conntrack_ecache_work(nf_ct_net(ct));
-       set_bit(IPS_DYING_BIT, &ct->status);
- delete:
        nf_ct_delete_from_lists(ct);
        nf_ct_put(ct);
        return true;
 }
 EXPORT_SYMBOL_GPL(nf_ct_delete);
 
-static void death_by_timeout(unsigned long ul_conntrack)
-{
-       nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
-}
-
 static inline bool
 nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
                const struct nf_conntrack_tuple *tuple,
@@ -481,22 +487,17 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
               net_eq(net, nf_ct_net(ct));
 }
 
-/* must be called with rcu read lock held */
-void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+/* caller must hold rcu readlock and none of the nf_conntrack_locks */
+static void nf_ct_gc_expired(struct nf_conn *ct)
 {
-       struct hlist_nulls_head *hptr;
-       unsigned int sequence, hsz;
+       if (!atomic_inc_not_zero(&ct->ct_general.use))
+               return;
 
-       do {
-               sequence = read_seqcount_begin(&nf_conntrack_generation);
-               hsz = nf_conntrack_htable_size;
-               hptr = nf_conntrack_hash;
-       } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+       if (nf_ct_should_gc(ct))
+               nf_ct_kill(ct);
 
-       *hash = hptr;
-       *hsize = hsz;
+       nf_ct_put(ct);
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_get_ht);
 
 /*
  * Warning :
@@ -510,16 +511,24 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
        struct nf_conntrack_tuple_hash *h;
        struct hlist_nulls_head *ct_hash;
        struct hlist_nulls_node *n;
-       unsigned int bucket, sequence;
+       unsigned int bucket, hsize;
 
 begin:
-       do {
-               sequence = read_seqcount_begin(&nf_conntrack_generation);
-               bucket = scale_hash(hash);
-               ct_hash = nf_conntrack_hash;
-       } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+       nf_conntrack_get_ht(&ct_hash, &hsize);
+       bucket = reciprocal_scale(hash, hsize);
 
        hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
+               struct nf_conn *ct;
+
+               ct = nf_ct_tuplehash_to_ctrack(h);
+               if (nf_ct_is_expired(ct)) {
+                       nf_ct_gc_expired(ct);
+                       continue;
+               }
+
+               if (nf_ct_is_dying(ct))
+                       continue;
+
                if (nf_ct_key_equal(h, tuple, zone, net)) {
                        NF_CT_STAT_INC_ATOMIC(net, found);
                        return h;
@@ -618,7 +627,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
                                    zone, net))
                        goto out;
 
-       add_timer(&ct->timeout);
        smp_wmb();
        /* The caller holds a reference to this object */
        atomic_set(&ct->ct_general.use, 2);
@@ -771,8 +779,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
        /* Timer relative to confirmation time, not original
           setting time, otherwise we'd get timer wrap in
           weird delay cases. */
-       ct->timeout.expires += jiffies;
-       add_timer(&ct->timeout);
+       ct->timeout += nfct_time_stamp;
        atomic_inc(&ct->ct_general.use);
        ct->status |= IPS_CONFIRMED;
 
@@ -823,29 +830,41 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
        const struct nf_conntrack_zone *zone;
        struct nf_conntrack_tuple_hash *h;
        struct hlist_nulls_head *ct_hash;
-       unsigned int hash, sequence;
+       unsigned int hash, hsize;
        struct hlist_nulls_node *n;
        struct nf_conn *ct;
 
        zone = nf_ct_zone(ignored_conntrack);
 
        rcu_read_lock();
-       do {
-               sequence = read_seqcount_begin(&nf_conntrack_generation);
-               hash = hash_conntrack(net, tuple);
-               ct_hash = nf_conntrack_hash;
-       } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ begin:
+       nf_conntrack_get_ht(&ct_hash, &hsize);
+       hash = __hash_conntrack(net, tuple, hsize);
 
        hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
                ct = nf_ct_tuplehash_to_ctrack(h);
-               if (ct != ignored_conntrack &&
-                   nf_ct_key_equal(h, tuple, zone, net)) {
+
+               if (ct == ignored_conntrack)
+                       continue;
+
+               if (nf_ct_is_expired(ct)) {
+                       nf_ct_gc_expired(ct);
+                       continue;
+               }
+
+               if (nf_ct_key_equal(h, tuple, zone, net)) {
                        NF_CT_STAT_INC_ATOMIC(net, found);
                        rcu_read_unlock();
                        return 1;
                }
                NF_CT_STAT_INC_ATOMIC(net, searched);
        }
+
+       if (get_nulls_value(n) != hash) {
+               NF_CT_STAT_INC_ATOMIC(net, search_restart);
+               goto begin;
+       }
+
        rcu_read_unlock();
 
        return 0;
@@ -867,6 +886,11 @@ static unsigned int early_drop_list(struct net *net,
        hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
                tmp = nf_ct_tuplehash_to_ctrack(h);
 
+               if (nf_ct_is_expired(tmp)) {
+                       nf_ct_gc_expired(tmp);
+                       continue;
+               }
+
                if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
                    !net_eq(nf_ct_net(tmp), net) ||
                    nf_ct_is_dying(tmp))
@@ -884,7 +908,6 @@ static unsigned int early_drop_list(struct net *net,
                 */
                if (net_eq(nf_ct_net(tmp), net) &&
                    nf_ct_is_confirmed(tmp) &&
-                   del_timer(&tmp->timeout) &&
                    nf_ct_delete(tmp, 0, 0))
                        drops++;
 
@@ -900,14 +923,11 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
 
        for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
                struct hlist_nulls_head *ct_hash;
-               unsigned hash, sequence, drops;
+               unsigned int hash, hsize, drops;
 
                rcu_read_lock();
-               do {
-                       sequence = read_seqcount_begin(&nf_conntrack_generation);
-                       hash = scale_hash(_hash++);
-                       ct_hash = nf_conntrack_hash;
-               } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+               nf_conntrack_get_ht(&ct_hash, &hsize);
+               hash = reciprocal_scale(_hash++, hsize);
 
                drops = early_drop_list(net, &ct_hash[hash]);
                rcu_read_unlock();
@@ -921,6 +941,69 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
        return false;
 }
 
+static void gc_worker(struct work_struct *work)
+{
+       unsigned int i, goal, buckets = 0, expired_count = 0;
+       unsigned long next_run = GC_INTERVAL;
+       unsigned int ratio, scanned = 0;
+       struct conntrack_gc_work *gc_work;
+
+       gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
+
+       goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS);
+       i = gc_work->last_bucket;
+
+       do {
+               struct nf_conntrack_tuple_hash *h;
+               struct hlist_nulls_head *ct_hash;
+               struct hlist_nulls_node *n;
+               unsigned int hashsz;
+               struct nf_conn *tmp;
+
+               i++;
+               rcu_read_lock();
+
+               nf_conntrack_get_ht(&ct_hash, &hashsz);
+               if (i >= hashsz)
+                       i = 0;
+
+               hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
+                       tmp = nf_ct_tuplehash_to_ctrack(h);
+
+                       scanned++;
+                       if (nf_ct_is_expired(tmp)) {
+                               nf_ct_gc_expired(tmp);
+                               expired_count++;
+                               continue;
+                       }
+               }
+
+               /* could check get_nulls_value() here and restart if ct
+                * was moved to another chain.  But given gc is best-effort
+                * we will just continue with next hash slot.
+                */
+               rcu_read_unlock();
+               cond_resched_rcu_qs();
+       } while (++buckets < goal &&
+                expired_count < GC_MAX_EVICTS);
+
+       if (gc_work->exiting)
+               return;
+
+       ratio = scanned ? expired_count * 100 / scanned : 0;
+       if (ratio >= 90)
+               next_run = 0;
+
+       gc_work->last_bucket = i;
+       schedule_delayed_work(&gc_work->dwork, next_run);
+}
+
+static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
+{
+       INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+       gc_work->exiting = false;
+}
+
 static struct nf_conn *
 __nf_conntrack_alloc(struct net *net,
                     const struct nf_conntrack_zone *zone,
@@ -957,8 +1040,6 @@ __nf_conntrack_alloc(struct net *net,
        /* save hash for reusing when confirming */
        *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
        ct->status = 0;
-       /* Don't set timer yet: wait for confirmation */
-       setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
        write_pnet(&ct->ct_net, net);
        memset(&ct->__nfct_init_offset[0], 0,
               offsetof(struct nf_conn, proto) -
@@ -1332,7 +1413,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
                          unsigned long extra_jiffies,
                          int do_acct)
 {
-       NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
        NF_CT_ASSERT(skb);
 
        /* Only update if this is not a fixed timeout */
@@ -1340,39 +1420,25 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
                goto acct;
 
        /* If not in hash table, timer will not be active yet */
-       if (!nf_ct_is_confirmed(ct)) {
-               ct->timeout.expires = extra_jiffies;
-       } else {
-               unsigned long newtime = jiffies + extra_jiffies;
-
-               /* Only update the timeout if the new timeout is at least
-                  HZ jiffies from the old timeout. Need del_timer for race
-                  avoidance (may already be dying). */
-               if (newtime - ct->timeout.expires >= HZ)
-                       mod_timer_pending(&ct->timeout, newtime);
-       }
+       if (nf_ct_is_confirmed(ct))
+               extra_jiffies += nfct_time_stamp;
 
+       ct->timeout = extra_jiffies;
 acct:
        if (do_acct)
                nf_ct_acct_update(ct, ctinfo, skb->len);
 }
 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
 
-bool __nf_ct_kill_acct(struct nf_conn *ct,
-                      enum ip_conntrack_info ctinfo,
-                      const struct sk_buff *skb,
-                      int do_acct)
+bool nf_ct_kill_acct(struct nf_conn *ct,
+                    enum ip_conntrack_info ctinfo,
+                    const struct sk_buff *skb)
 {
-       if (do_acct)
-               nf_ct_acct_update(ct, ctinfo, skb->len);
+       nf_ct_acct_update(ct, ctinfo, skb->len);
 
-       if (del_timer(&ct->timeout)) {
-               ct->timeout.function((unsigned long)ct);
-               return true;
-       }
-       return false;
+       return nf_ct_delete(ct, 0, 0);
 }
-EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
+EXPORT_SYMBOL_GPL(nf_ct_kill_acct);
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 
@@ -1505,11 +1571,8 @@ void nf_ct_iterate_cleanup(struct net *net,
 
        while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
                /* Time to push up daises... */
-               if (del_timer(&ct->timeout))
-                       nf_ct_delete(ct, portid, report);
-
-               /* ... else the timer will get him soon. */
 
+               nf_ct_delete(ct, portid, report);
                nf_ct_put(ct);
                cond_resched();
        }
@@ -1545,6 +1608,7 @@ static int untrack_refs(void)
 
 void nf_conntrack_cleanup_start(void)
 {
+       conntrack_gc_work.exiting = true;
        RCU_INIT_POINTER(ip_ct_attach, NULL);
 }
 
@@ -1554,6 +1618,7 @@ void nf_conntrack_cleanup_end(void)
        while (untrack_refs() > 0)
                schedule();
 
+       cancel_delayed_work_sync(&conntrack_gc_work.dwork);
        nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
 
        nf_conntrack_proto_fini();
@@ -1828,6 +1893,10 @@ int nf_conntrack_init_start(void)
        }
        /*  - and look it like as a confirmed connection */
        nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
+
+       conntrack_gc_work_init(&conntrack_gc_work);
+       schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL);
+
        return 0;
 
 err_proto:
index d28011b..da9df2d 100644 (file)
@@ -49,8 +49,13 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
 
        hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
                struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+               struct nf_conntrack_ecache *e;
 
-               if (nf_ct_is_dying(ct))
+               if (!nf_ct_is_confirmed(ct))
+                       continue;
+
+               e = nf_ct_ecache_find(ct);
+               if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL)
                        continue;
 
                if (nf_conntrack_event(IPCT_DESTROY, ct)) {
@@ -58,8 +63,7 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
                        break;
                }
 
-               /* we've got the event delivered, now it's dying */
-               set_bit(IPS_DYING_BIT, &ct->status);
+               e->state = NFCT_ECACHE_DESTROY_SENT;
                refs[evicted] = ct;
 
                if (++evicted >= ARRAY_SIZE(refs)) {
@@ -130,7 +134,7 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
        if (!e)
                goto out_unlock;
 
-       if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
+       if (nf_ct_is_confirmed(ct)) {
                struct nf_ct_event item = {
                        .ct     = ct,
                        .portid = e->portid ? e->portid : portid,
@@ -150,11 +154,13 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
                                 * triggered by a process, we store the PORTID
                                 * to include it in the retransmission.
                                 */
-                               if (eventmask & (1 << IPCT_DESTROY) &&
-                                   e->portid == 0 && portid != 0)
-                                       e->portid = portid;
-                               else
+                               if (eventmask & (1 << IPCT_DESTROY)) {
+                                       if (e->portid == 0 && portid != 0)
+                                               e->portid = portid;
+                                       e->state = NFCT_ECACHE_DESTROY_FAIL;
+                               } else {
                                        e->missed |= eventmask;
+                               }
                        } else {
                                e->missed &= ~missed;
                        }
index 4314700..b6934b5 100644 (file)
@@ -237,7 +237,7 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd,
        }
        delim = data[0];
        if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) {
-               pr_debug("try_eprt: invalid delimitter.\n");
+               pr_debug("try_eprt: invalid delimiter.\n");
                return 0;
        }
 
index fdfc71f..c052b71 100644 (file)
@@ -149,10 +149,7 @@ nla_put_failure:
 
 static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
 {
-       long timeout = ((long)ct->timeout.expires - (long)jiffies) / HZ;
-
-       if (timeout < 0)
-               timeout = 0;
+       long timeout = nf_ct_expires(ct) / HZ;
 
        if (nla_put_be32(skb, CTA_TIMEOUT, htonl(timeout)))
                goto nla_put_failure;
@@ -818,14 +815,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
        struct hlist_nulls_node *n;
        struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
        u_int8_t l3proto = nfmsg->nfgen_family;
-       int res;
+       struct nf_conn *nf_ct_evict[8];
+       int res, i;
        spinlock_t *lockp;
 
        last = (struct nf_conn *)cb->args[1];
+       i = 0;
 
        local_bh_disable();
        for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
 restart:
+               while (i) {
+                       i--;
+                       if (nf_ct_should_gc(nf_ct_evict[i]))
+                               nf_ct_kill(nf_ct_evict[i]);
+                       nf_ct_put(nf_ct_evict[i]);
+               }
+
                lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
                nf_conntrack_lock(lockp);
                if (cb->args[0] >= nf_conntrack_htable_size) {
@@ -837,6 +843,13 @@ restart:
                        if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
                                continue;
                        ct = nf_ct_tuplehash_to_ctrack(h);
+                       if (nf_ct_is_expired(ct)) {
+                               if (i < ARRAY_SIZE(nf_ct_evict) &&
+                                   atomic_inc_not_zero(&ct->ct_general.use))
+                                       nf_ct_evict[i++] = ct;
+                               continue;
+                       }
+
                        if (!net_eq(net, nf_ct_net(ct)))
                                continue;
 
@@ -878,6 +891,13 @@ out:
        if (last)
                nf_ct_put(last);
 
+       while (i) {
+               i--;
+               if (nf_ct_should_gc(nf_ct_evict[i]))
+                       nf_ct_kill(nf_ct_evict[i]);
+               nf_ct_put(nf_ct_evict[i]);
+       }
+
        return skb->len;
 }
 
@@ -1147,9 +1167,7 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
                }
        }
 
-       if (del_timer(&ct->timeout))
-               nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
-
+       nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
        nf_ct_put(ct);
 
        return 0;
@@ -1517,11 +1535,10 @@ static int ctnetlink_change_timeout(struct nf_conn *ct,
 {
        u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
 
-       if (!del_timer(&ct->timeout))
-               return -ETIME;
+       ct->timeout = nfct_time_stamp + timeout * HZ;
 
-       ct->timeout.expires = jiffies + timeout * HZ;
-       add_timer(&ct->timeout);
+       if (test_bit(IPS_DYING_BIT, &ct->status))
+               return -ETIME;
 
        return 0;
 }
@@ -1719,9 +1736,8 @@ ctnetlink_create_conntrack(struct net *net,
 
        if (!cda[CTA_TIMEOUT])
                goto err1;
-       ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
 
-       ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
+       ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
 
        rcu_read_lock();
        if (cda[CTA_HELP]) {
index 5588c7a..f60a475 100644 (file)
@@ -157,8 +157,7 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
                pr_debug("setting timeout of conntrack %p to 0\n", sibling);
                sibling->proto.gre.timeout        = 0;
                sibling->proto.gre.stream_timeout = 0;
-               if (del_timer(&sibling->timeout))
-                       sibling->timeout.function((unsigned long)sibling);
+               nf_ct_kill(sibling);
                nf_ct_put(sibling);
                return 1;
        } else {
index b65d586..8d2c7d8 100644 (file)
@@ -159,54 +159,6 @@ static int kill_l4proto(struct nf_conn *i, void *data)
               nf_ct_l3num(i) == l4proto->l3proto;
 }
 
-static struct nf_ip_net *nf_ct_l3proto_net(struct net *net,
-                                          struct nf_conntrack_l3proto *l3proto)
-{
-       if (l3proto->l3proto == PF_INET)
-               return &net->ct.nf_ct_proto;
-       else
-               return NULL;
-}
-
-static int nf_ct_l3proto_register_sysctl(struct net *net,
-                                        struct nf_conntrack_l3proto *l3proto)
-{
-       int err = 0;
-       struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto);
-       /* nf_conntrack_l3proto_ipv6 doesn't support sysctl */
-       if (in == NULL)
-               return 0;
-
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-       if (in->ctl_table != NULL) {
-               err = nf_ct_register_sysctl(net,
-                                           &in->ctl_table_header,
-                                           l3proto->ctl_table_path,
-                                           in->ctl_table);
-               if (err < 0) {
-                       kfree(in->ctl_table);
-                       in->ctl_table = NULL;
-               }
-       }
-#endif
-       return err;
-}
-
-static void nf_ct_l3proto_unregister_sysctl(struct net *net,
-                                           struct nf_conntrack_l3proto *l3proto)
-{
-       struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto);
-
-       if (in == NULL)
-               return;
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-       if (in->ctl_table_header != NULL)
-               nf_ct_unregister_sysctl(&in->ctl_table_header,
-                                       &in->ctl_table,
-                                       0);
-#endif
-}
-
 int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
 {
        int ret = 0;
@@ -241,7 +193,7 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
 int nf_ct_l3proto_pernet_register(struct net *net,
                                  struct nf_conntrack_l3proto *proto)
 {
-       int ret = 0;
+       int ret;
 
        if (proto->init_net) {
                ret = proto->init_net(net);
@@ -249,7 +201,7 @@ int nf_ct_l3proto_pernet_register(struct net *net,
                        return ret;
        }
 
-       return nf_ct_l3proto_register_sysctl(net, proto);
+       return 0;
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
 
@@ -272,8 +224,6 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
 void nf_ct_l3proto_pernet_unregister(struct net *net,
                                     struct nf_conntrack_l3proto *proto)
 {
-       nf_ct_l3proto_unregister_sysctl(net, proto);
-
        /* Remove all contrack entries for this protocol */
        nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
 }
@@ -312,26 +262,6 @@ int nf_ct_l4proto_register_sysctl(struct net *net,
                        }
                }
        }
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-       if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_table != NULL) {
-               if (err < 0) {
-                       nf_ct_kfree_compat_sysctl_table(pn);
-                       goto out;
-               }
-               err = nf_ct_register_sysctl(net,
-                                           &pn->ctl_compat_header,
-                                           "net/ipv4/netfilter",
-                                           pn->ctl_compat_table);
-               if (err == 0)
-                       goto out;
-
-               nf_ct_kfree_compat_sysctl_table(pn);
-               nf_ct_unregister_sysctl(&pn->ctl_table_header,
-                                       &pn->ctl_table,
-                                       pn->users);
-       }
-out:
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif /* CONFIG_SYSCTL */
        return err;
 }
@@ -346,13 +276,6 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net,
                nf_ct_unregister_sysctl(&pn->ctl_table_header,
                                        &pn->ctl_table,
                                        pn->users);
-
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-       if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_header != NULL)
-               nf_ct_unregister_sysctl(&pn->ctl_compat_header,
-                                       &pn->ctl_compat_table,
-                                       0);
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif /* CONFIG_SYSCTL */
 }
 
index 399a38f..a45bee5 100644 (file)
@@ -402,7 +402,8 @@ static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 {
        struct dccp_hdr _hdr, *dh;
 
-       dh = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+       /* Actually only need first 4 bytes to get ports. */
+       dh = skb_header_pointer(skb, dataoff, 4, &_hdr);
        if (dh == NULL)
                return false;
 
index 86dc752..d5868ba 100644 (file)
@@ -151,17 +151,6 @@ static struct ctl_table generic_sysctl_table[] = {
        },
        { }
 };
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table generic_compat_sysctl_table[] = {
-       {
-               .procname       = "ip_conntrack_generic_timeout",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif /* CONFIG_SYSCTL */
 
 static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -179,40 +168,14 @@ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
        return 0;
 }
 
-static int generic_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
-                                              struct nf_generic_net *gn)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-       pn->ctl_compat_table = kmemdup(generic_compat_sysctl_table,
-                                      sizeof(generic_compat_sysctl_table),
-                                      GFP_KERNEL);
-       if (!pn->ctl_compat_table)
-               return -ENOMEM;
-
-       pn->ctl_compat_table[0].data = &gn->timeout;
-#endif
-#endif
-       return 0;
-}
-
 static int generic_init_net(struct net *net, u_int16_t proto)
 {
-       int ret;
        struct nf_generic_net *gn = generic_pernet(net);
        struct nf_proto_net *pn = &gn->pn;
 
        gn->timeout = nf_ct_generic_timeout;
 
-       ret = generic_kmemdup_compat_sysctl_table(pn, gn);
-       if (ret < 0)
-               return ret;
-
-       ret = generic_kmemdup_sysctl_table(pn, gn);
-       if (ret < 0)
-               nf_ct_kfree_compat_sysctl_table(pn);
-
-       return ret;
+       return generic_kmemdup_sysctl_table(pn, gn);
 }
 
 static struct nf_proto_net *generic_get_net_proto(struct net *net)
index 1d7ab96..982ea62 100644 (file)
@@ -161,8 +161,8 @@ static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
        const struct sctphdr *hp;
        struct sctphdr _hdr;
 
-       /* Actually only need first 8 bytes. */
-       hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+       /* Actually only need first 4 bytes to get ports. */
+       hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
        if (hp == NULL)
                return false;
 
@@ -705,54 +705,6 @@ static struct ctl_table sctp_sysctl_table[] = {
        },
        { }
 };
-
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table sctp_compat_sysctl_table[] = {
-       {
-               .procname       = "ip_conntrack_sctp_timeout_closed",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       {
-               .procname       = "ip_conntrack_sctp_timeout_cookie_wait",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       {
-               .procname       = "ip_conntrack_sctp_timeout_cookie_echoed",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       {
-               .procname       = "ip_conntrack_sctp_timeout_established",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       {
-               .procname       = "ip_conntrack_sctp_timeout_shutdown_sent",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       {
-               .procname       = "ip_conntrack_sctp_timeout_shutdown_recd",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       {
-               .procname       = "ip_conntrack_sctp_timeout_shutdown_ack_sent",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif
 
 static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -781,32 +733,8 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
        return 0;
 }
 
-static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
-                                           struct sctp_net *sn)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-       pn->ctl_compat_table = kmemdup(sctp_compat_sysctl_table,
-                                      sizeof(sctp_compat_sysctl_table),
-                                      GFP_KERNEL);
-       if (!pn->ctl_compat_table)
-               return -ENOMEM;
-
-       pn->ctl_compat_table[0].data = &sn->timeouts[SCTP_CONNTRACK_CLOSED];
-       pn->ctl_compat_table[1].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_WAIT];
-       pn->ctl_compat_table[2].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_ECHOED];
-       pn->ctl_compat_table[3].data = &sn->timeouts[SCTP_CONNTRACK_ESTABLISHED];
-       pn->ctl_compat_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT];
-       pn->ctl_compat_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD];
-       pn->ctl_compat_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT];
-#endif
-#endif
-       return 0;
-}
-
 static int sctp_init_net(struct net *net, u_int16_t proto)
 {
-       int ret;
        struct sctp_net *sn = sctp_pernet(net);
        struct nf_proto_net *pn = &sn->pn;
 
@@ -817,18 +745,7 @@ static int sctp_init_net(struct net *net, u_int16_t proto)
                        sn->timeouts[i] = sctp_timeouts[i];
        }
 
-       if (proto == AF_INET) {
-               ret = sctp_kmemdup_compat_sysctl_table(pn, sn);
-               if (ret < 0)
-                       return ret;
-
-               ret = sctp_kmemdup_sysctl_table(pn, sn);
-               if (ret < 0)
-                       nf_ct_kfree_compat_sysctl_table(pn);
-       } else
-               ret = sctp_kmemdup_sysctl_table(pn, sn);
-
-       return ret;
+       return sctp_kmemdup_sysctl_table(pn, sn);
 }
 
 static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
index 70c8381..69f6877 100644 (file)
@@ -282,8 +282,8 @@ static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
        const struct tcphdr *hp;
        struct tcphdr _hdr;
 
-       /* Actually only need first 8 bytes. */
-       hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+       /* Actually only need first 4 bytes to get ports. */
+       hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
        if (hp == NULL)
                return false;
 
@@ -1481,90 +1481,6 @@ static struct ctl_table tcp_sysctl_table[] = {
        },
        { }
 };
-
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table tcp_compat_sysctl_table[] = {
-       {
-               .procname       = "ip_conntrack_tcp_timeout_syn_sent",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       {
-               .procname       = "ip_conntrack_tcp_timeout_syn_sent2",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       {
-               .procname       = "ip_conntrack_tcp_timeout_syn_recv",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       {
-               .procname       = "ip_conntrack_tcp_timeout_established",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       {
-               .procname       = "ip_conntrack_tcp_timeout_fin_wait",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       {
-               .procname       = "ip_conntrack_tcp_timeout_close_wait",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       {
-               .procname       = "ip_conntrack_tcp_timeout_last_ack",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       {
-               .procname       = "ip_conntrack_tcp_timeout_time_wait",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       {
-               .procname       = "ip_conntrack_tcp_timeout_close",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       {
-               .procname       = "ip_conntrack_tcp_timeout_max_retrans",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       {
-               .procname       = "ip_conntrack_tcp_loose",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "ip_conntrack_tcp_be_liberal",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "ip_conntrack_tcp_max_retrans",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif /* CONFIG_SYSCTL */
 
 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -1597,38 +1513,8 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
        return 0;
 }
 
-static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
-                                          struct nf_tcp_net *tn)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-       pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
-                                      sizeof(tcp_compat_sysctl_table),
-                                      GFP_KERNEL);
-       if (!pn->ctl_compat_table)
-               return -ENOMEM;
-
-       pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
-       pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
-       pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
-       pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
-       pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
-       pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
-       pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
-       pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
-       pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
-       pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
-       pn->ctl_compat_table[10].data = &tn->tcp_loose;
-       pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
-       pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
-#endif
-#endif
-       return 0;
-}
-
 static int tcp_init_net(struct net *net, u_int16_t proto)
 {
-       int ret;
        struct nf_tcp_net *tn = tcp_pernet(net);
        struct nf_proto_net *pn = &tn->pn;
 
@@ -1643,18 +1529,7 @@ static int tcp_init_net(struct net *net, u_int16_t proto)
                tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
        }
 
-       if (proto == AF_INET) {
-               ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
-               if (ret < 0)
-                       return ret;
-
-               ret = tcp_kmemdup_sysctl_table(pn, tn);
-               if (ret < 0)
-                       nf_ct_kfree_compat_sysctl_table(pn);
-       } else
-               ret = tcp_kmemdup_sysctl_table(pn, tn);
-
-       return ret;
+       return tcp_kmemdup_sysctl_table(pn, tn);
 }
 
 static struct nf_proto_net *tcp_get_net_proto(struct net *net)
index 4fd0405..20f35ed 100644 (file)
@@ -44,8 +44,8 @@ static bool udp_pkt_to_tuple(const struct sk_buff *skb,
        const struct udphdr *hp;
        struct udphdr _hdr;
 
-       /* Actually only need first 8 bytes. */
-       hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+       /* Actually only need first 4 bytes to get ports. */
+       hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
        if (hp == NULL)
                return false;
 
@@ -218,23 +218,6 @@ static struct ctl_table udp_sysctl_table[] = {
        },
        { }
 };
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table udp_compat_sysctl_table[] = {
-       {
-               .procname       = "ip_conntrack_udp_timeout",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       {
-               .procname       = "ip_conntrack_udp_timeout_stream",
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-       },
-       { }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif /* CONFIG_SYSCTL */
 
 static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -254,27 +237,8 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
        return 0;
 }
 
-static int udp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
-                                          struct nf_udp_net *un)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-       pn->ctl_compat_table = kmemdup(udp_compat_sysctl_table,
-                                      sizeof(udp_compat_sysctl_table),
-                                      GFP_KERNEL);
-       if (!pn->ctl_compat_table)
-               return -ENOMEM;
-
-       pn->ctl_compat_table[0].data = &un->timeouts[UDP_CT_UNREPLIED];
-       pn->ctl_compat_table[1].data = &un->timeouts[UDP_CT_REPLIED];
-#endif
-#endif
-       return 0;
-}
-
 static int udp_init_net(struct net *net, u_int16_t proto)
 {
-       int ret;
        struct nf_udp_net *un = udp_pernet(net);
        struct nf_proto_net *pn = &un->pn;
 
@@ -285,18 +249,7 @@ static int udp_init_net(struct net *net, u_int16_t proto)
                        un->timeouts[i] = udp_timeouts[i];
        }
 
-       if (proto == AF_INET) {
-               ret = udp_kmemdup_compat_sysctl_table(pn, un);
-               if (ret < 0)
-                       return ret;
-
-               ret = udp_kmemdup_sysctl_table(pn, un);
-               if (ret < 0)
-                       nf_ct_kfree_compat_sysctl_table(pn);
-       } else
-               ret = udp_kmemdup_sysctl_table(pn, un);
-
-       return ret;
+       return udp_kmemdup_sysctl_table(pn, un);
 }
 
 static struct nf_proto_net *udp_get_net_proto(struct net *net)
index 9d692f5..029206e 100644 (file)
@@ -54,7 +54,8 @@ static bool udplite_pkt_to_tuple(const struct sk_buff *skb,
        const struct udphdr *hp;
        struct udphdr _hdr;
 
-       hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+       /* Actually only need first 4 bytes to get ports. */
+       hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
        if (hp == NULL)
                return false;
 
index 958a145..3d9a316 100644 (file)
@@ -205,6 +205,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
        struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
        const struct nf_conntrack_l3proto *l3proto;
        const struct nf_conntrack_l4proto *l4proto;
+       struct net *net = seq_file_net(s);
        int ret = 0;
 
        NF_CT_ASSERT(ct);
@@ -215,6 +216,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
        if (NF_CT_DIRECTION(hash))
                goto release;
 
+       if (!net_eq(nf_ct_net(ct), net))
+               goto release;
+
        l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
        NF_CT_ASSERT(l3proto);
        l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
@@ -224,8 +228,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
        seq_printf(s, "%-8s %u %-8s %u %ld ",
                   l3proto->name, nf_ct_l3num(ct),
                   l4proto->name, nf_ct_protonum(ct),
-                  timer_pending(&ct->timeout)
-                  ? (long)(ct->timeout.expires - jiffies)/HZ : 0);
+                  nf_ct_expires(ct)  / HZ);
 
        if (l4proto->print_conntrack)
                l4proto->print_conntrack(s, ct);
index aa5847a..30a17d6 100644 (file)
@@ -39,12 +39,12 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
        return NULL;
 }
 
-void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
+int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
 {
        const struct nf_logger *log;
 
-       if (pf == NFPROTO_UNSPEC)
-               return;
+       if (pf == NFPROTO_UNSPEC || pf >= ARRAY_SIZE(net->nf.nf_loggers))
+               return -EOPNOTSUPP;
 
        mutex_lock(&nf_log_mutex);
        log = nft_log_dereference(net->nf.nf_loggers[pf]);
@@ -52,6 +52,8 @@ void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
                rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
 
        mutex_unlock(&nf_log_mutex);
+
+       return 0;
 }
 EXPORT_SYMBOL(nf_log_set);
 
index de31818..81ae41f 100644 (file)
@@ -565,16 +565,10 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
         * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
         * will delete entry from already-freed table.
         */
-       if (!del_timer(&ct->timeout))
-               return 1;
-
        ct->status &= ~IPS_NAT_DONE_MASK;
-
        rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource,
                               nf_nat_bysource_params);
 
-       add_timer(&ct->timeout);
-
        /* don't delete conntrack.  Although that would make things a lot
         * simpler, we'd end up flushing all conntracks on nat rmmod.
         */
index 7e1c876..bd9715e 100644 (file)
@@ -1196,6 +1196,83 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
        }
 }
 
+struct nft_chain_hook {
+       u32                             num;
+       u32                             priority;
+       const struct nf_chain_type      *type;
+       struct net_device               *dev;
+};
+
+static int nft_chain_parse_hook(struct net *net,
+                               const struct nlattr * const nla[],
+                               struct nft_af_info *afi,
+                               struct nft_chain_hook *hook, bool create)
+{
+       struct nlattr *ha[NFTA_HOOK_MAX + 1];
+       const struct nf_chain_type *type;
+       struct net_device *dev;
+       int err;
+
+       err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
+                              nft_hook_policy);
+       if (err < 0)
+               return err;
+
+       if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
+           ha[NFTA_HOOK_PRIORITY] == NULL)
+               return -EINVAL;
+
+       hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
+       if (hook->num >= afi->nhooks)
+               return -EINVAL;
+
+       hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
+
+       type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT];
+       if (nla[NFTA_CHAIN_TYPE]) {
+               type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE],
+                                                  create);
+               if (IS_ERR(type))
+                       return PTR_ERR(type);
+       }
+       if (!(type->hook_mask & (1 << hook->num)))
+               return -EOPNOTSUPP;
+       if (!try_module_get(type->owner))
+               return -ENOENT;
+
+       hook->type = type;
+
+       hook->dev = NULL;
+       if (afi->flags & NFT_AF_NEEDS_DEV) {
+               char ifname[IFNAMSIZ];
+
+               if (!ha[NFTA_HOOK_DEV]) {
+                       module_put(type->owner);
+                       return -EOPNOTSUPP;
+               }
+
+               nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
+               dev = dev_get_by_name(net, ifname);
+               if (!dev) {
+                       module_put(type->owner);
+                       return -ENOENT;
+               }
+               hook->dev = dev;
+       } else if (ha[NFTA_HOOK_DEV]) {
+               module_put(type->owner);
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static void nft_chain_release_hook(struct nft_chain_hook *hook)
+{
+       module_put(hook->type->owner);
+       if (hook->dev != NULL)
+               dev_put(hook->dev);
+}
+
 static int nf_tables_newchain(struct net *net, struct sock *nlsk,
                              struct sk_buff *skb, const struct nlmsghdr *nlh,
                              const struct nlattr * const nla[])
@@ -1206,10 +1283,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
        struct nft_table *table;
        struct nft_chain *chain;
        struct nft_base_chain *basechain = NULL;
-       struct nlattr *ha[NFTA_HOOK_MAX + 1];
        u8 genmask = nft_genmask_next(net);
        int family = nfmsg->nfgen_family;
-       struct net_device *dev = NULL;
        u8 policy = NF_ACCEPT;
        u64 handle = 0;
        unsigned int i;
@@ -1273,6 +1348,37 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
                if (nlh->nlmsg_flags & NLM_F_REPLACE)
                        return -EOPNOTSUPP;
 
+               if (nla[NFTA_CHAIN_HOOK]) {
+                       struct nft_base_chain *basechain;
+                       struct nft_chain_hook hook;
+                       struct nf_hook_ops *ops;
+
+                       if (!(chain->flags & NFT_BASE_CHAIN))
+                               return -EBUSY;
+
+                       err = nft_chain_parse_hook(net, nla, afi, &hook,
+                                                  create);
+                       if (err < 0)
+                               return err;
+
+                       basechain = nft_base_chain(chain);
+                       if (basechain->type != hook.type) {
+                               nft_chain_release_hook(&hook);
+                               return -EBUSY;
+                       }
+
+                       for (i = 0; i < afi->nops; i++) {
+                               ops = &basechain->ops[i];
+                               if (ops->hooknum != hook.num ||
+                                   ops->priority != hook.priority ||
+                                   ops->dev != hook.dev) {
+                                       nft_chain_release_hook(&hook);
+                                       return -EBUSY;
+                               }
+                       }
+                       nft_chain_release_hook(&hook);
+               }
+
                if (nla[NFTA_CHAIN_HANDLE] && name) {
                        struct nft_chain *chain2;
 
@@ -1320,102 +1426,53 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
                return -EOVERFLOW;
 
        if (nla[NFTA_CHAIN_HOOK]) {
-               const struct nf_chain_type *type;
+               struct nft_chain_hook hook;
                struct nf_hook_ops *ops;
                nf_hookfn *hookfn;
-               u32 hooknum, priority;
-
-               type = chain_type[family][NFT_CHAIN_T_DEFAULT];
-               if (nla[NFTA_CHAIN_TYPE]) {
-                       type = nf_tables_chain_type_lookup(afi,
-                                                          nla[NFTA_CHAIN_TYPE],
-                                                          create);
-                       if (IS_ERR(type))
-                               return PTR_ERR(type);
-               }
 
-               err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
-                                      nft_hook_policy);
+               err = nft_chain_parse_hook(net, nla, afi, &hook, create);
                if (err < 0)
                        return err;
-               if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
-                   ha[NFTA_HOOK_PRIORITY] == NULL)
-                       return -EINVAL;
-
-               hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
-               if (hooknum >= afi->nhooks)
-                       return -EINVAL;
-               priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
-
-               if (!(type->hook_mask & (1 << hooknum)))
-                       return -EOPNOTSUPP;
-               if (!try_module_get(type->owner))
-                       return -ENOENT;
-               hookfn = type->hooks[hooknum];
-
-               if (afi->flags & NFT_AF_NEEDS_DEV) {
-                       char ifname[IFNAMSIZ];
-
-                       if (!ha[NFTA_HOOK_DEV]) {
-                               module_put(type->owner);
-                               return -EOPNOTSUPP;
-                       }
-
-                       nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
-                       dev = dev_get_by_name(net, ifname);
-                       if (!dev) {
-                               module_put(type->owner);
-                               return -ENOENT;
-                       }
-               } else if (ha[NFTA_HOOK_DEV]) {
-                       module_put(type->owner);
-                       return -EOPNOTSUPP;
-               }
 
                basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
                if (basechain == NULL) {
-                       module_put(type->owner);
-                       if (dev != NULL)
-                               dev_put(dev);
+                       nft_chain_release_hook(&hook);
                        return -ENOMEM;
                }
 
-               if (dev != NULL)
-                       strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
+               if (hook.dev != NULL)
+                       strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ);
 
                if (nla[NFTA_CHAIN_COUNTERS]) {
                        stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
                        if (IS_ERR(stats)) {
-                               module_put(type->owner);
+                               nft_chain_release_hook(&hook);
                                kfree(basechain);
-                               if (dev != NULL)
-                                       dev_put(dev);
                                return PTR_ERR(stats);
                        }
                        basechain->stats = stats;
                } else {
                        stats = netdev_alloc_pcpu_stats(struct nft_stats);
                        if (stats == NULL) {
-                               module_put(type->owner);
+                               nft_chain_release_hook(&hook);
                                kfree(basechain);
-                               if (dev != NULL)
-                                       dev_put(dev);
                                return -ENOMEM;
                        }
                        rcu_assign_pointer(basechain->stats, stats);
                }
 
-               basechain->type = type;
+               hookfn = hook.type->hooks[hook.num];
+               basechain->type = hook.type;
                chain = &basechain->chain;
 
                for (i = 0; i < afi->nops; i++) {
                        ops = &basechain->ops[i];
                        ops->pf         = family;
-                       ops->hooknum    = hooknum;
-                       ops->priority   = priority;
+                       ops->hooknum    = hook.num;
+                       ops->priority   = hook.priority;
                        ops->priv       = chain;
                        ops->hook       = afi->hooks[ops->hooknum];
-                       ops->dev        = dev;
+                       ops->dev        = hook.dev;
                        if (hookfn)
                                ops->hook = hookfn;
                        if (afi->hook_ops_init)
@@ -3426,12 +3483,12 @@ static int nft_setelem_parse_flags(const struct nft_set *set,
 }
 
 static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
-                           const struct nlattr *attr)
+                           const struct nlattr *attr, u32 nlmsg_flags)
 {
        struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
        struct nft_data_desc d1, d2;
        struct nft_set_ext_tmpl tmpl;
-       struct nft_set_ext *ext;
+       struct nft_set_ext *ext, *ext2;
        struct nft_set_elem elem;
        struct nft_set_binding *binding;
        struct nft_userdata *udata;
@@ -3558,9 +3615,19 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                goto err4;
 
        ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
-       err = set->ops->insert(ctx->net, set, &elem);
-       if (err < 0)
+       err = set->ops->insert(ctx->net, set, &elem, &ext2);
+       if (err) {
+               if (err == -EEXIST) {
+                       if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
+                           nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
+                           memcmp(nft_set_ext_data(ext),
+                                  nft_set_ext_data(ext2), set->dlen) != 0)
+                               err = -EBUSY;
+                       else if (!(nlmsg_flags & NLM_F_EXCL))
+                               err = 0;
+               }
                goto err5;
+       }
 
        nft_trans_elem(trans) = elem;
        list_add_tail(&trans->list, &ctx->net->nft.commit_list);
@@ -3616,7 +3683,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
                    !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact))
                        return -ENFILE;
 
-               err = nft_add_set_elem(&ctx, set, attr);
+               err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags);
                if (err < 0) {
                        atomic_dec(&set->nelems);
                        break;
index 5eefe4a..75d696f 100644 (file)
@@ -30,7 +30,6 @@ nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
        if (!iph)
                return;
 
-       iph = ip_hdr(skb);
        if (iph->ihl < 5 || iph->version != 4)
                return;
 
index 1b4de4b..d44d89b 100644 (file)
@@ -326,14 +326,14 @@ static int nfnl_acct_try_del(struct nf_acct *cur)
 {
        int ret = 0;
 
-       /* we want to avoid races with nfnl_acct_find_get. */
-       if (atomic_dec_and_test(&cur->refcnt)) {
+       /* We want to avoid races with nfnl_acct_put. So only when the current
+        * refcnt is 1, we decrease it to 0.
+        */
+       if (atomic_cmpxchg(&cur->refcnt, 1, 0) == 1) {
                /* We are protected by nfnl mutex. */
                list_del_rcu(&cur->head);
                kfree_rcu(cur, rcu_head);
        } else {
-               /* still in use, restore reference counter. */
-               atomic_inc(&cur->refcnt);
                ret = -EBUSY;
        }
        return ret;
@@ -343,12 +343,12 @@ static int nfnl_acct_del(struct net *net, struct sock *nfnl,
                         struct sk_buff *skb, const struct nlmsghdr *nlh,
                         const struct nlattr * const tb[])
 {
-       char *acct_name;
-       struct nf_acct *cur;
+       struct nf_acct *cur, *tmp;
        int ret = -ENOENT;
+       char *acct_name;
 
        if (!tb[NFACCT_NAME]) {
-               list_for_each_entry(cur, &net->nfnl_acct_list, head)
+               list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head)
                        nfnl_acct_try_del(cur);
 
                return 0;
@@ -443,7 +443,7 @@ void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
 }
 EXPORT_SYMBOL_GPL(nfnl_acct_update);
 
-static void nfnl_overquota_report(struct nf_acct *nfacct)
+static void nfnl_overquota_report(struct net *net, struct nf_acct *nfacct)
 {
        int ret;
        struct sk_buff *skb;
@@ -458,11 +458,12 @@ static void nfnl_overquota_report(struct nf_acct *nfacct)
                kfree_skb(skb);
                return;
        }
-       netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
+       netlink_broadcast(net->nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
                          GFP_ATOMIC);
 }
 
-int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
+int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb,
+                       struct nf_acct *nfacct)
 {
        u64 now;
        u64 *quota;
@@ -480,7 +481,7 @@ int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
 
        if (now >= *quota &&
            !test_and_set_bit(NFACCT_OVERQUOTA_BIT, &nfacct->flags)) {
-               nfnl_overquota_report(nfacct);
+               nfnl_overquota_report(net, nfacct);
        }
 
        return ret;
index 4cdcd96..139e086 100644 (file)
@@ -98,31 +98,28 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
                break;
        }
 
-       l4proto = nf_ct_l4proto_find_get(l3num, l4num);
-
-       /* This protocol is not supportted, skip. */
-       if (l4proto->l4proto != l4num) {
-               ret = -EOPNOTSUPP;
-               goto err_proto_put;
-       }
-
        if (matching) {
                if (nlh->nlmsg_flags & NLM_F_REPLACE) {
                        /* You cannot replace one timeout policy by another of
                         * different kind, sorry.
                         */
                        if (matching->l3num != l3num ||
-                           matching->l4proto->l4proto != l4num) {
-                               ret = -EINVAL;
-                               goto err_proto_put;
-                       }
-
-                       ret = ctnl_timeout_parse_policy(&matching->data,
-                                                       l4proto, net,
-                                                       cda[CTA_TIMEOUT_DATA]);
-                       return ret;
+                           matching->l4proto->l4proto != l4num)
+                               return -EINVAL;
+
+                       return ctnl_timeout_parse_policy(&matching->data,
+                                                        matching->l4proto, net,
+                                                        cda[CTA_TIMEOUT_DATA]);
                }
-               ret = -EBUSY;
+
+               return -EBUSY;
+       }
+
+       l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+
+       /* This protocol is not supportted, skip. */
+       if (l4proto->l4proto != l4num) {
+               ret = -EOPNOTSUPP;
                goto err_proto_put;
        }
 
@@ -305,7 +302,16 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
        const struct hlist_nulls_node *nn;
        unsigned int last_hsize;
        spinlock_t *lock;
-       int i;
+       int i, cpu;
+
+       for_each_possible_cpu(cpu) {
+               struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+               spin_lock_bh(&pcpu->lock);
+               hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
+                       untimeout(h, timeout);
+               spin_unlock_bh(&pcpu->lock);
+       }
 
        local_bh_disable();
 restart:
@@ -330,16 +336,16 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
 {
        int ret = 0;
 
-       /* we want to avoid races with nf_ct_timeout_find_get. */
-       if (atomic_dec_and_test(&timeout->refcnt)) {
+       /* We want to avoid races with ctnl_timeout_put. So only when the
+        * current refcnt is 1, we decrease it to 0.
+        */
+       if (atomic_cmpxchg(&timeout->refcnt, 1, 0) == 1) {
                /* We are protected by nfnl mutex. */
                list_del_rcu(&timeout->head);
                nf_ct_l4proto_put(timeout->l4proto);
                ctnl_untimeout(net, timeout);
                kfree_rcu(timeout, rcu_head);
        } else {
-               /* still in use, restore reference counter. */
-               atomic_inc(&timeout->refcnt);
                ret = -EBUSY;
        }
        return ret;
@@ -350,12 +356,13 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
                                 const struct nlmsghdr *nlh,
                                 const struct nlattr * const cda[])
 {
-       struct ctnl_timeout *cur;
+       struct ctnl_timeout *cur, *tmp;
        int ret = -ENOENT;
        char *name;
 
        if (!cda[CTA_TIMEOUT_NAME]) {
-               list_for_each_entry(cur, &net->nfct_timeout_list, head)
+               list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list,
+                                        head)
                        ctnl_timeout_try_del(net, cur);
 
                return 0;
@@ -543,7 +550,9 @@ err:
 
 static void ctnl_timeout_put(struct ctnl_timeout *timeout)
 {
-       atomic_dec(&timeout->refcnt);
+       if (atomic_dec_and_test(&timeout->refcnt))
+               kfree_rcu(timeout, rcu_head);
+
        module_put(THIS_MODULE);
 }
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
@@ -591,7 +600,9 @@ static void __net_exit cttimeout_net_exit(struct net *net)
        list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
                list_del_rcu(&cur->head);
                nf_ct_l4proto_put(cur->l4proto);
-               kfree_rcu(cur, rcu_head);
+
+               if (atomic_dec_and_test(&cur->refcnt))
+                       kfree_rcu(cur, rcu_head);
        }
 }
 
index cbcfdfb..6577db5 100644 (file)
@@ -1147,6 +1147,7 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG);
 MODULE_ALIAS_NF_LOGGER(AF_INET, 1);
 MODULE_ALIAS_NF_LOGGER(AF_INET6, 1);
 MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1);
+MODULE_ALIAS_NF_LOGGER(3, 1); /* NFPROTO_ARP */
 
 module_init(nfnetlink_log_init);
 module_exit(nfnetlink_log_fini);
index 564fa79..764251d 100644 (file)
 /*
- * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2016 Laura Garcia <nevola@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/list.h>
-#include <linux/log2.h>
-#include <linux/jhash.h>
 #include <linux/netlink.h>
-#include <linux/workqueue.h>
-#include <linux/rhashtable.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
-
-/* We target a hash table size of 4, element hint is 75% of final size */
-#define NFT_HASH_ELEMENT_HINT 3
+#include <net/netfilter/nf_tables_core.h>
+#include <linux/jhash.h>
 
 struct nft_hash {
-       struct rhashtable               ht;
-       struct delayed_work             gc_work;
-};
-
-struct nft_hash_elem {
-       struct rhash_head               node;
-       struct nft_set_ext              ext;
+       enum nft_registers      sreg:8;
+       enum nft_registers      dreg:8;
+       u8                      len;
+       u32                     modulus;
+       u32                     seed;
 };
 
-struct nft_hash_cmp_arg {
-       const struct nft_set            *set;
-       const u32                       *key;
-       u8                              genmask;
-};
-
-static const struct rhashtable_params nft_hash_params;
-
-static inline u32 nft_hash_key(const void *data, u32 len, u32 seed)
-{
-       const struct nft_hash_cmp_arg *arg = data;
-
-       return jhash(arg->key, len, seed);
-}
-
-static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
-{
-       const struct nft_hash_elem *he = data;
-
-       return jhash(nft_set_ext_key(&he->ext), len, seed);
-}
-
-static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
-                              const void *ptr)
-{
-       const struct nft_hash_cmp_arg *x = arg->key;
-       const struct nft_hash_elem *he = ptr;
-
-       if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
-               return 1;
-       if (nft_set_elem_expired(&he->ext))
-               return 1;
-       if (!nft_set_elem_active(&he->ext, x->genmask))
-               return 1;
-       return 0;
-}
-
-static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
-                           const u32 *key, const struct nft_set_ext **ext)
-{
-       struct nft_hash *priv = nft_set_priv(set);
-       const struct nft_hash_elem *he;
-       struct nft_hash_cmp_arg arg = {
-               .genmask = nft_genmask_cur(net),
-               .set     = set,
-               .key     = key,
-       };
-
-       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
-       if (he != NULL)
-               *ext = &he->ext;
-
-       return !!he;
-}
-
-static bool nft_hash_update(struct nft_set *set, const u32 *key,
-                           void *(*new)(struct nft_set *,
-                                        const struct nft_expr *,
-                                        struct nft_regs *regs),
-                           const struct nft_expr *expr,
-                           struct nft_regs *regs,
-                           const struct nft_set_ext **ext)
-{
-       struct nft_hash *priv = nft_set_priv(set);
-       struct nft_hash_elem *he;
-       struct nft_hash_cmp_arg arg = {
-               .genmask = NFT_GENMASK_ANY,
-               .set     = set,
-               .key     = key,
-       };
-
-       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
-       if (he != NULL)
-               goto out;
-
-       he = new(set, expr, regs);
-       if (he == NULL)
-               goto err1;
-       if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
-                                        nft_hash_params))
-               goto err2;
-out:
-       *ext = &he->ext;
-       return true;
-
-err2:
-       nft_set_elem_destroy(set, he);
-err1:
-       return false;
-}
-
-static int nft_hash_insert(const struct net *net, const struct nft_set *set,
-                          const struct nft_set_elem *elem)
-{
-       struct nft_hash *priv = nft_set_priv(set);
-       struct nft_hash_elem *he = elem->priv;
-       struct nft_hash_cmp_arg arg = {
-               .genmask = nft_genmask_next(net),
-               .set     = set,
-               .key     = elem->key.val.data,
-       };
-
-       return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
-                                           nft_hash_params);
-}
-
-static void nft_hash_activate(const struct net *net, const struct nft_set *set,
-                             const struct nft_set_elem *elem)
+static void nft_hash_eval(const struct nft_expr *expr,
+                         struct nft_regs *regs,
+                         const struct nft_pktinfo *pkt)
 {
-       struct nft_hash_elem *he = elem->priv;
+       struct nft_hash *priv = nft_expr_priv(expr);
+       const void *data = &regs->data[priv->sreg];
 
-       nft_set_elem_change_active(net, set, &he->ext);
-       nft_set_elem_clear_busy(&he->ext);
+       regs->data[priv->dreg] =
+               reciprocal_scale(jhash(data, priv->len, priv->seed),
+                                priv->modulus);
 }
 
-static void *nft_hash_deactivate(const struct net *net,
-                                const struct nft_set *set,
-                                const struct nft_set_elem *elem)
-{
-       struct nft_hash *priv = nft_set_priv(set);
-       struct nft_hash_elem *he;
-       struct nft_hash_cmp_arg arg = {
-               .genmask = nft_genmask_next(net),
-               .set     = set,
-               .key     = elem->key.val.data,
-       };
-
-       rcu_read_lock();
-       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
-       if (he != NULL) {
-               if (!nft_set_elem_mark_busy(&he->ext) ||
-                   !nft_is_active(net, &he->ext))
-                       nft_set_elem_change_active(net, set, &he->ext);
-               else
-                       he = NULL;
-       }
-       rcu_read_unlock();
-
-       return he;
-}
-
-static void nft_hash_remove(const struct nft_set *set,
-                           const struct nft_set_elem *elem)
-{
-       struct nft_hash *priv = nft_set_priv(set);
-       struct nft_hash_elem *he = elem->priv;
-
-       rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
-}
-
-static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
-                         struct nft_set_iter *iter)
-{
-       struct nft_hash *priv = nft_set_priv(set);
-       struct nft_hash_elem *he;
-       struct rhashtable_iter hti;
-       struct nft_set_elem elem;
-       int err;
-
-       err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
-       iter->err = err;
-       if (err)
-               return;
-
-       err = rhashtable_walk_start(&hti);
-       if (err && err != -EAGAIN) {
-               iter->err = err;
-               goto out;
-       }
-
-       while ((he = rhashtable_walk_next(&hti))) {
-               if (IS_ERR(he)) {
-                       err = PTR_ERR(he);
-                       if (err != -EAGAIN) {
-                               iter->err = err;
-                               goto out;
-                       }
-
-                       continue;
-               }
-
-               if (iter->count < iter->skip)
-                       goto cont;
-               if (nft_set_elem_expired(&he->ext))
-                       goto cont;
-               if (!nft_set_elem_active(&he->ext, iter->genmask))
-                       goto cont;
-
-               elem.priv = he;
-
-               iter->err = iter->fn(ctx, set, iter, &elem);
-               if (iter->err < 0)
-                       goto out;
-
-cont:
-               iter->count++;
-       }
-
-out:
-       rhashtable_walk_stop(&hti);
-       rhashtable_walk_exit(&hti);
-}
-
-static void nft_hash_gc(struct work_struct *work)
-{
-       struct nft_set *set;
-       struct nft_hash_elem *he;
-       struct nft_hash *priv;
-       struct nft_set_gc_batch *gcb = NULL;
-       struct rhashtable_iter hti;
-       int err;
-
-       priv = container_of(work, struct nft_hash, gc_work.work);
-       set  = nft_set_container_of(priv);
-
-       err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
-       if (err)
-               goto schedule;
-
-       err = rhashtable_walk_start(&hti);
-       if (err && err != -EAGAIN)
-               goto out;
-
-       while ((he = rhashtable_walk_next(&hti))) {
-               if (IS_ERR(he)) {
-                       if (PTR_ERR(he) != -EAGAIN)
-                               goto out;
-                       continue;
-               }
-
-               if (!nft_set_elem_expired(&he->ext))
-                       continue;
-               if (nft_set_elem_mark_busy(&he->ext))
-                       continue;
-
-               gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
-               if (gcb == NULL)
-                       goto out;
-               rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
-               atomic_dec(&set->nelems);
-               nft_set_gc_batch_add(gcb, he);
-       }
-out:
-       rhashtable_walk_stop(&hti);
-       rhashtable_walk_exit(&hti);
-
-       nft_set_gc_batch_complete(gcb);
-schedule:
-       queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
-                          nft_set_gc_interval(set));
-}
-
-static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
-{
-       return sizeof(struct nft_hash);
-}
-
-static const struct rhashtable_params nft_hash_params = {
-       .head_offset            = offsetof(struct nft_hash_elem, node),
-       .hashfn                 = nft_hash_key,
-       .obj_hashfn             = nft_hash_obj,
-       .obj_cmpfn              = nft_hash_cmp,
-       .automatic_shrinking    = true,
+static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
+       [NFTA_HASH_SREG]        = { .type = NLA_U32 },
+       [NFTA_HASH_DREG]        = { .type = NLA_U32 },
+       [NFTA_HASH_LEN]         = { .type = NLA_U32 },
+       [NFTA_HASH_MODULUS]     = { .type = NLA_U32 },
+       [NFTA_HASH_SEED]        = { .type = NLA_U32 },
 };
 
-static int nft_hash_init(const struct nft_set *set,
-                        const struct nft_set_desc *desc,
+static int nft_hash_init(const struct nft_ctx *ctx,
+                        const struct nft_expr *expr,
                         const struct nlattr * const tb[])
 {
-       struct nft_hash *priv = nft_set_priv(set);
-       struct rhashtable_params params = nft_hash_params;
-       int err;
+       struct nft_hash *priv = nft_expr_priv(expr);
+       u32 len;
 
-       params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
-       params.key_len    = set->klen;
+       if (!tb[NFTA_HASH_SREG] ||
+           !tb[NFTA_HASH_DREG] ||
+           !tb[NFTA_HASH_LEN]  ||
+           !tb[NFTA_HASH_SEED] ||
+           !tb[NFTA_HASH_MODULUS])
+               return -EINVAL;
 
-       err = rhashtable_init(&priv->ht, &params);
-       if (err < 0)
-               return err;
+       priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]);
+       priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
 
-       INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
-       if (set->flags & NFT_SET_TIMEOUT)
-               queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
-                                  nft_set_gc_interval(set));
-       return 0;
-}
+       len = ntohl(nla_get_be32(tb[NFTA_HASH_LEN]));
+       if (len == 0 || len > U8_MAX)
+               return -ERANGE;
 
-static void nft_hash_elem_destroy(void *ptr, void *arg)
-{
-       nft_set_elem_destroy((const struct nft_set *)arg, ptr);
-}
+       priv->len = len;
 
-static void nft_hash_destroy(const struct nft_set *set)
-{
-       struct nft_hash *priv = nft_set_priv(set);
+       priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
+       if (priv->modulus <= 1)
+               return -ERANGE;
 
-       cancel_delayed_work_sync(&priv->gc_work);
-       rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
-                                   (void *)set);
+       priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
+
+       return nft_validate_register_load(priv->sreg, len) &&
+              nft_validate_register_store(ctx, priv->dreg, NULL,
+                                          NFT_DATA_VALUE, sizeof(u32));
 }
 
-static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
-                             struct nft_set_estimate *est)
+static int nft_hash_dump(struct sk_buff *skb,
+                        const struct nft_expr *expr)
 {
-       unsigned int esize;
+       const struct nft_hash *priv = nft_expr_priv(expr);
 
-       esize = sizeof(struct nft_hash_elem);
-       if (desc->size) {
-               est->size = sizeof(struct nft_hash) +
-                           roundup_pow_of_two(desc->size * 4 / 3) *
-                           sizeof(struct nft_hash_elem *) +
-                           desc->size * esize;
-       } else {
-               /* Resizing happens when the load drops below 30% or goes
-                * above 75%. The average of 52.5% load (approximated by 50%)
-                * is used for the size estimation of the hash buckets,
-                * meaning we calculate two buckets per element.
-                */
-               est->size = esize + 2 * sizeof(struct nft_hash_elem *);
-       }
+       if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg))
+               goto nla_put_failure;
+       if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_HASH_LEN, htonl(priv->len)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed)))
+               goto nla_put_failure;
 
-       est->class = NFT_SET_CLASS_O_1;
+       return 0;
 
-       return true;
+nla_put_failure:
+       return -1;
 }
 
-static struct nft_set_ops nft_hash_ops __read_mostly = {
-       .privsize       = nft_hash_privsize,
-       .elemsize       = offsetof(struct nft_hash_elem, ext),
-       .estimate       = nft_hash_estimate,
+static struct nft_expr_type nft_hash_type;
+static const struct nft_expr_ops nft_hash_ops = {
+       .type           = &nft_hash_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_hash)),
+       .eval           = nft_hash_eval,
        .init           = nft_hash_init,
-       .destroy        = nft_hash_destroy,
-       .insert         = nft_hash_insert,
-       .activate       = nft_hash_activate,
-       .deactivate     = nft_hash_deactivate,
-       .remove         = nft_hash_remove,
-       .lookup         = nft_hash_lookup,
-       .update         = nft_hash_update,
-       .walk           = nft_hash_walk,
-       .features       = NFT_SET_MAP | NFT_SET_TIMEOUT,
+       .dump           = nft_hash_dump,
+};
+
+static struct nft_expr_type nft_hash_type __read_mostly = {
+       .name           = "hash",
+       .ops            = &nft_hash_ops,
+       .policy         = nft_hash_policy,
+       .maxattr        = NFTA_HASH_MAX,
        .owner          = THIS_MODULE,
 };
 
 static int __init nft_hash_module_init(void)
 {
-       return nft_register_set(&nft_hash_ops);
+       return nft_register_expr(&nft_hash_type);
 }
 
 static void __exit nft_hash_module_exit(void)
 {
-       nft_unregister_set(&nft_hash_ops);
+       nft_unregister_expr(&nft_hash_type);
 }
 
 module_init(nft_hash_module_init);
 module_exit(nft_hash_module_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_SET();
+MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("hash");
index 2863f34..8a6bc76 100644 (file)
@@ -291,10 +291,16 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
 }
 EXPORT_SYMBOL_GPL(nft_meta_get_init);
 
-static int nft_meta_set_init_pkttype(const struct nft_ctx *ctx)
+int nft_meta_set_validate(const struct nft_ctx *ctx,
+                         const struct nft_expr *expr,
+                         const struct nft_data **data)
 {
+       struct nft_meta *priv = nft_expr_priv(expr);
        unsigned int hooks;
 
+       if (priv->key != NFT_META_PKTTYPE)
+               return 0;
+
        switch (ctx->afi->family) {
        case NFPROTO_BRIDGE:
                hooks = 1 << NF_BR_PRE_ROUTING;
@@ -308,6 +314,7 @@ static int nft_meta_set_init_pkttype(const struct nft_ctx *ctx)
 
        return nft_chain_validate_hooks(ctx->chain, hooks);
 }
+EXPORT_SYMBOL_GPL(nft_meta_set_validate);
 
 int nft_meta_set_init(const struct nft_ctx *ctx,
                      const struct nft_expr *expr,
@@ -327,15 +334,16 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
                len = sizeof(u8);
                break;
        case NFT_META_PKTTYPE:
-               err = nft_meta_set_init_pkttype(ctx);
-               if (err)
-                       return err;
                len = sizeof(u8);
                break;
        default:
                return -EOPNOTSUPP;
        }
 
+       err = nft_meta_set_validate(ctx, expr, NULL);
+       if (err < 0)
+               return err;
+
        priv->sreg = nft_parse_register(tb[NFTA_META_SREG]);
        err = nft_validate_register_load(priv->sreg, len);
        if (err < 0)
@@ -407,6 +415,7 @@ static const struct nft_expr_ops nft_meta_set_ops = {
        .init           = nft_meta_set_init,
        .destroy        = nft_meta_set_destroy,
        .dump           = nft_meta_set_dump,
+       .validate       = nft_meta_set_validate,
 };
 
 static const struct nft_expr_ops *
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
new file mode 100644 (file)
index 0000000..294745e
--- /dev/null
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2016 Laura Garcia <nevola@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/static_key.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
+
+struct nft_ng_inc {
+       enum nft_registers      dreg:8;
+       u32                     until;
+       atomic_t                counter;
+};
+
+static void nft_ng_inc_eval(const struct nft_expr *expr,
+                           struct nft_regs *regs,
+                           const struct nft_pktinfo *pkt)
+{
+       struct nft_ng_inc *priv = nft_expr_priv(expr);
+       u32 nval, oval;
+
+       do {
+               oval = atomic_read(&priv->counter);
+               nval = (oval + 1 < priv->until) ? oval + 1 : 0;
+       } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval);
+
+       memcpy(&regs->data[priv->dreg], &priv->counter, sizeof(u32));
+}
+
+static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
+       [NFTA_NG_DREG]          = { .type = NLA_U32 },
+       [NFTA_NG_UNTIL]         = { .type = NLA_U32 },
+       [NFTA_NG_TYPE]          = { .type = NLA_U32 },
+};
+
+static int nft_ng_inc_init(const struct nft_ctx *ctx,
+                          const struct nft_expr *expr,
+                          const struct nlattr * const tb[])
+{
+       struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+       priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL]));
+       if (priv->until == 0)
+               return -ERANGE;
+
+       priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
+       atomic_set(&priv->counter, 0);
+
+       return nft_validate_register_store(ctx, priv->dreg, NULL,
+                                          NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
+                      u32 until, enum nft_ng_types type)
+{
+       if (nft_dump_register(skb, NFTA_NG_DREG, dreg))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_NG_UNTIL, htonl(until)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type)))
+               goto nla_put_failure;
+
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+       return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_INCREMENTAL);
+}
+
+struct nft_ng_random {
+       enum nft_registers      dreg:8;
+       u32                     until;
+};
+
+static void nft_ng_random_eval(const struct nft_expr *expr,
+                              struct nft_regs *regs,
+                              const struct nft_pktinfo *pkt)
+{
+       struct nft_ng_random *priv = nft_expr_priv(expr);
+       struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state);
+
+       regs->data[priv->dreg] = reciprocal_scale(prandom_u32_state(state),
+                                                 priv->until);
+}
+
+static int nft_ng_random_init(const struct nft_ctx *ctx,
+                             const struct nft_expr *expr,
+                             const struct nlattr * const tb[])
+{
+       struct nft_ng_random *priv = nft_expr_priv(expr);
+
+       priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL]));
+       if (priv->until == 0)
+               return -ERANGE;
+
+       prandom_init_once(&nft_numgen_prandom_state);
+
+       priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
+
+       return nft_validate_register_store(ctx, priv->dreg, NULL,
+                                          NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_ng_random *priv = nft_expr_priv(expr);
+
+       return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_RANDOM);
+}
+
+static struct nft_expr_type nft_ng_type;
+static const struct nft_expr_ops nft_ng_inc_ops = {
+       .type           = &nft_ng_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)),
+       .eval           = nft_ng_inc_eval,
+       .init           = nft_ng_inc_init,
+       .dump           = nft_ng_inc_dump,
+};
+
+static const struct nft_expr_ops nft_ng_random_ops = {
+       .type           = &nft_ng_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
+       .eval           = nft_ng_random_eval,
+       .init           = nft_ng_random_init,
+       .dump           = nft_ng_random_dump,
+};
+
+static const struct nft_expr_ops *
+nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+{
+       u32 type;
+
+       if (!tb[NFTA_NG_DREG]   ||
+           !tb[NFTA_NG_UNTIL]  ||
+           !tb[NFTA_NG_TYPE])
+               return ERR_PTR(-EINVAL);
+
+       type = ntohl(nla_get_be32(tb[NFTA_NG_TYPE]));
+
+       switch (type) {
+       case NFT_NG_INCREMENTAL:
+               return &nft_ng_inc_ops;
+       case NFT_NG_RANDOM:
+               return &nft_ng_random_ops;
+       }
+
+       return ERR_PTR(-EINVAL);
+}
+
+static struct nft_expr_type nft_ng_type __read_mostly = {
+       .name           = "numgen",
+       .select_ops     = &nft_ng_select_ops,
+       .policy         = nft_ng_policy,
+       .maxattr        = NFTA_NG_MAX,
+       .owner          = THIS_MODULE,
+};
+
+static int __init nft_ng_module_init(void)
+{
+       return nft_register_expr(&nft_ng_type);
+}
+
+static void __exit nft_ng_module_exit(void)
+{
+       nft_unregister_expr(&nft_ng_type);
+}
+
+module_init(nft_ng_module_init);
+module_exit(nft_ng_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("numgen");
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
new file mode 100644 (file)
index 0000000..6eafbf9
--- /dev/null
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_quota {
+       u64             quota;
+       bool            invert;
+       atomic64_t      remain;
+};
+
+static inline long nft_quota(struct nft_quota *priv,
+                            const struct nft_pktinfo *pkt)
+{
+       return atomic64_sub_return(pkt->skb->len, &priv->remain);
+}
+
+static void nft_quota_eval(const struct nft_expr *expr,
+                          struct nft_regs *regs,
+                          const struct nft_pktinfo *pkt)
+{
+       struct nft_quota *priv = nft_expr_priv(expr);
+
+       if (nft_quota(priv, pkt) < 0 && !priv->invert)
+               regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = {
+       [NFTA_QUOTA_BYTES]      = { .type = NLA_U64 },
+       [NFTA_QUOTA_FLAGS]      = { .type = NLA_U32 },
+};
+
+static int nft_quota_init(const struct nft_ctx *ctx,
+                         const struct nft_expr *expr,
+                         const struct nlattr * const tb[])
+{
+       struct nft_quota *priv = nft_expr_priv(expr);
+       u32 flags = 0;
+       u64 quota;
+
+       if (!tb[NFTA_QUOTA_BYTES])
+               return -EINVAL;
+
+       quota = be64_to_cpu(nla_get_be64(tb[NFTA_QUOTA_BYTES]));
+       if (quota > S64_MAX)
+               return -EOVERFLOW;
+
+       if (tb[NFTA_QUOTA_FLAGS]) {
+               flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS]));
+               if (flags & ~NFT_QUOTA_F_INV)
+                       return -EINVAL;
+       }
+
+       priv->quota = quota;
+       priv->invert = (flags & NFT_QUOTA_F_INV) ? true : false;
+       atomic64_set(&priv->remain, quota);
+
+       return 0;
+}
+
+static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_quota *priv = nft_expr_priv(expr);
+       u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0;
+
+       if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota),
+                        NFTA_QUOTA_PAD) ||
+           nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags)))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_type nft_quota_type;
+static const struct nft_expr_ops nft_quota_ops = {
+       .type           = &nft_quota_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_quota)),
+       .eval           = nft_quota_eval,
+       .init           = nft_quota_init,
+       .dump           = nft_quota_dump,
+};
+
+static struct nft_expr_type nft_quota_type __read_mostly = {
+       .name           = "quota",
+       .ops            = &nft_quota_ops,
+       .policy         = nft_quota_policy,
+       .maxattr        = NFTA_QUOTA_MAX,
+       .flags          = NFT_EXPR_STATEFUL,
+       .owner          = THIS_MODULE,
+};
+
+static int __init nft_quota_module_init(void)
+{
+        return nft_register_expr(&nft_quota_type);
+}
+
+static void __exit nft_quota_module_exit(void)
+{
+        nft_unregister_expr(&nft_quota_type);
+}
+
+module_init(nft_quota_module_init);
+module_exit(nft_quota_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("quota");
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
deleted file mode 100644 (file)
index ffe9ae0..0000000
+++ /dev/null
@@ -1,316 +0,0 @@
-/*
- * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/rbtree.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-
-static DEFINE_SPINLOCK(nft_rbtree_lock);
-
-struct nft_rbtree {
-       struct rb_root          root;
-};
-
-struct nft_rbtree_elem {
-       struct rb_node          node;
-       struct nft_set_ext      ext;
-};
-
-static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
-{
-       return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
-              (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
-}
-
-static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
-                            const struct nft_rbtree_elem *interval)
-{
-       return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
-}
-
-static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
-                             const u32 *key, const struct nft_set_ext **ext)
-{
-       const struct nft_rbtree *priv = nft_set_priv(set);
-       const struct nft_rbtree_elem *rbe, *interval = NULL;
-       u8 genmask = nft_genmask_cur(net);
-       const struct rb_node *parent;
-       const void *this;
-       int d;
-
-       spin_lock_bh(&nft_rbtree_lock);
-       parent = priv->root.rb_node;
-       while (parent != NULL) {
-               rbe = rb_entry(parent, struct nft_rbtree_elem, node);
-
-               this = nft_set_ext_key(&rbe->ext);
-               d = memcmp(this, key, set->klen);
-               if (d < 0) {
-                       parent = parent->rb_left;
-                       /* In case of adjacent ranges, we always see the high
-                        * part of the range in first place, before the low one.
-                        * So don't update interval if the keys are equal.
-                        */
-                       if (interval && nft_rbtree_equal(set, this, interval))
-                               continue;
-                       interval = rbe;
-               } else if (d > 0)
-                       parent = parent->rb_right;
-               else {
-                       if (!nft_set_elem_active(&rbe->ext, genmask)) {
-                               parent = parent->rb_left;
-                               continue;
-                       }
-                       if (nft_rbtree_interval_end(rbe))
-                               goto out;
-                       spin_unlock_bh(&nft_rbtree_lock);
-
-                       *ext = &rbe->ext;
-                       return true;
-               }
-       }
-
-       if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
-           nft_set_elem_active(&interval->ext, genmask) &&
-           !nft_rbtree_interval_end(interval)) {
-               spin_unlock_bh(&nft_rbtree_lock);
-               *ext = &interval->ext;
-               return true;
-       }
-out:
-       spin_unlock_bh(&nft_rbtree_lock);
-       return false;
-}
-
-static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
-                              struct nft_rbtree_elem *new)
-{
-       struct nft_rbtree *priv = nft_set_priv(set);
-       u8 genmask = nft_genmask_next(net);
-       struct nft_rbtree_elem *rbe;
-       struct rb_node *parent, **p;
-       int d;
-
-       parent = NULL;
-       p = &priv->root.rb_node;
-       while (*p != NULL) {
-               parent = *p;
-               rbe = rb_entry(parent, struct nft_rbtree_elem, node);
-               d = memcmp(nft_set_ext_key(&rbe->ext),
-                          nft_set_ext_key(&new->ext),
-                          set->klen);
-               if (d < 0)
-                       p = &parent->rb_left;
-               else if (d > 0)
-                       p = &parent->rb_right;
-               else {
-                       if (nft_set_elem_active(&rbe->ext, genmask)) {
-                               if (nft_rbtree_interval_end(rbe) &&
-                                   !nft_rbtree_interval_end(new))
-                                       p = &parent->rb_left;
-                               else if (!nft_rbtree_interval_end(rbe) &&
-                                        nft_rbtree_interval_end(new))
-                                       p = &parent->rb_right;
-                               else
-                                       return -EEXIST;
-                       }
-               }
-       }
-       rb_link_node(&new->node, parent, p);
-       rb_insert_color(&new->node, &priv->root);
-       return 0;
-}
-
-static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
-                            const struct nft_set_elem *elem)
-{
-       struct nft_rbtree_elem *rbe = elem->priv;
-       int err;
-
-       spin_lock_bh(&nft_rbtree_lock);
-       err = __nft_rbtree_insert(net, set, rbe);
-       spin_unlock_bh(&nft_rbtree_lock);
-
-       return err;
-}
-
-static void nft_rbtree_remove(const struct nft_set *set,
-                             const struct nft_set_elem *elem)
-{
-       struct nft_rbtree *priv = nft_set_priv(set);
-       struct nft_rbtree_elem *rbe = elem->priv;
-
-       spin_lock_bh(&nft_rbtree_lock);
-       rb_erase(&rbe->node, &priv->root);
-       spin_unlock_bh(&nft_rbtree_lock);
-}
-
-static void nft_rbtree_activate(const struct net *net,
-                               const struct nft_set *set,
-                               const struct nft_set_elem *elem)
-{
-       struct nft_rbtree_elem *rbe = elem->priv;
-
-       nft_set_elem_change_active(net, set, &rbe->ext);
-}
-
-static void *nft_rbtree_deactivate(const struct net *net,
-                                  const struct nft_set *set,
-                                  const struct nft_set_elem *elem)
-{
-       const struct nft_rbtree *priv = nft_set_priv(set);
-       const struct rb_node *parent = priv->root.rb_node;
-       struct nft_rbtree_elem *rbe, *this = elem->priv;
-       u8 genmask = nft_genmask_next(net);
-       int d;
-
-       while (parent != NULL) {
-               rbe = rb_entry(parent, struct nft_rbtree_elem, node);
-
-               d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val,
-                                          set->klen);
-               if (d < 0)
-                       parent = parent->rb_left;
-               else if (d > 0)
-                       parent = parent->rb_right;
-               else {
-                       if (!nft_set_elem_active(&rbe->ext, genmask)) {
-                               parent = parent->rb_left;
-                               continue;
-                       }
-                       if (nft_rbtree_interval_end(rbe) &&
-                           !nft_rbtree_interval_end(this)) {
-                               parent = parent->rb_left;
-                               continue;
-                       } else if (!nft_rbtree_interval_end(rbe) &&
-                                  nft_rbtree_interval_end(this)) {
-                               parent = parent->rb_right;
-                               continue;
-                       }
-                       nft_set_elem_change_active(net, set, &rbe->ext);
-                       return rbe;
-               }
-       }
-       return NULL;
-}
-
-static void nft_rbtree_walk(const struct nft_ctx *ctx,
-                           const struct nft_set *set,
-                           struct nft_set_iter *iter)
-{
-       const struct nft_rbtree *priv = nft_set_priv(set);
-       struct nft_rbtree_elem *rbe;
-       struct nft_set_elem elem;
-       struct rb_node *node;
-
-       spin_lock_bh(&nft_rbtree_lock);
-       for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
-               rbe = rb_entry(node, struct nft_rbtree_elem, node);
-
-               if (iter->count < iter->skip)
-                       goto cont;
-               if (!nft_set_elem_active(&rbe->ext, iter->genmask))
-                       goto cont;
-
-               elem.priv = rbe;
-
-               iter->err = iter->fn(ctx, set, iter, &elem);
-               if (iter->err < 0) {
-                       spin_unlock_bh(&nft_rbtree_lock);
-                       return;
-               }
-cont:
-               iter->count++;
-       }
-       spin_unlock_bh(&nft_rbtree_lock);
-}
-
-static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
-{
-       return sizeof(struct nft_rbtree);
-}
-
-static int nft_rbtree_init(const struct nft_set *set,
-                          const struct nft_set_desc *desc,
-                          const struct nlattr * const nla[])
-{
-       struct nft_rbtree *priv = nft_set_priv(set);
-
-       priv->root = RB_ROOT;
-       return 0;
-}
-
-static void nft_rbtree_destroy(const struct nft_set *set)
-{
-       struct nft_rbtree *priv = nft_set_priv(set);
-       struct nft_rbtree_elem *rbe;
-       struct rb_node *node;
-
-       while ((node = priv->root.rb_node) != NULL) {
-               rb_erase(node, &priv->root);
-               rbe = rb_entry(node, struct nft_rbtree_elem, node);
-               nft_set_elem_destroy(set, rbe);
-       }
-}
-
-static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
-                               struct nft_set_estimate *est)
-{
-       unsigned int nsize;
-
-       nsize = sizeof(struct nft_rbtree_elem);
-       if (desc->size)
-               est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
-       else
-               est->size = nsize;
-
-       est->class = NFT_SET_CLASS_O_LOG_N;
-
-       return true;
-}
-
-static struct nft_set_ops nft_rbtree_ops __read_mostly = {
-       .privsize       = nft_rbtree_privsize,
-       .elemsize       = offsetof(struct nft_rbtree_elem, ext),
-       .estimate       = nft_rbtree_estimate,
-       .init           = nft_rbtree_init,
-       .destroy        = nft_rbtree_destroy,
-       .insert         = nft_rbtree_insert,
-       .remove         = nft_rbtree_remove,
-       .deactivate     = nft_rbtree_deactivate,
-       .activate       = nft_rbtree_activate,
-       .lookup         = nft_rbtree_lookup,
-       .walk           = nft_rbtree_walk,
-       .features       = NFT_SET_INTERVAL | NFT_SET_MAP,
-       .owner          = THIS_MODULE,
-};
-
-static int __init nft_rbtree_module_init(void)
-{
-       return nft_register_set(&nft_rbtree_ops);
-}
-
-static void __exit nft_rbtree_module_exit(void)
-{
-       nft_unregister_set(&nft_rbtree_ops);
-}
-
-module_init(nft_rbtree_module_init);
-module_exit(nft_rbtree_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_SET();
index 0522fc9..c64de3f 100644 (file)
@@ -26,11 +26,27 @@ const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
 };
 EXPORT_SYMBOL_GPL(nft_reject_policy);
 
+int nft_reject_validate(const struct nft_ctx *ctx,
+                       const struct nft_expr *expr,
+                       const struct nft_data **data)
+{
+       return nft_chain_validate_hooks(ctx->chain,
+                                       (1 << NF_INET_LOCAL_IN) |
+                                       (1 << NF_INET_FORWARD) |
+                                       (1 << NF_INET_LOCAL_OUT));
+}
+EXPORT_SYMBOL_GPL(nft_reject_validate);
+
 int nft_reject_init(const struct nft_ctx *ctx,
                    const struct nft_expr *expr,
                    const struct nlattr * const tb[])
 {
        struct nft_reject *priv = nft_expr_priv(expr);
+       int err;
+
+       err = nft_reject_validate(ctx, expr, NULL);
+       if (err < 0)
+               return err;
 
        if (tb[NFTA_REJECT_TYPE] == NULL)
                return -EINVAL;
index 759ca52..e79d9ca 100644 (file)
@@ -66,7 +66,11 @@ static int nft_reject_inet_init(const struct nft_ctx *ctx,
                                const struct nlattr * const tb[])
 {
        struct nft_reject *priv = nft_expr_priv(expr);
-       int icmp_code;
+       int icmp_code, err;
+
+       err = nft_reject_validate(ctx, expr, NULL);
+       if (err < 0)
+               return err;
 
        if (tb[NFTA_REJECT_TYPE] == NULL)
                return -EINVAL;
@@ -124,6 +128,7 @@ static const struct nft_expr_ops nft_reject_inet_ops = {
        .eval           = nft_reject_inet_eval,
        .init           = nft_reject_inet_init,
        .dump           = nft_reject_inet_dump,
+       .validate       = nft_reject_validate,
 };
 
 static struct nft_expr_type nft_reject_inet_type __read_mostly = {
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
new file mode 100644 (file)
index 0000000..3794cb2
--- /dev/null
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/log2.h>
+#include <linux/jhash.h>
+#include <linux/netlink.h>
+#include <linux/workqueue.h>
+#include <linux/rhashtable.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+/* We target a hash table size of 4, element hint is 75% of final size */
+#define NFT_HASH_ELEMENT_HINT 3
+
+struct nft_hash {
+       struct rhashtable               ht;
+       struct delayed_work             gc_work;
+};
+
+struct nft_hash_elem {
+       struct rhash_head               node;
+       struct nft_set_ext              ext;
+};
+
+struct nft_hash_cmp_arg {
+       const struct nft_set            *set;
+       const u32                       *key;
+       u8                              genmask;
+};
+
+static const struct rhashtable_params nft_hash_params;
+
+static inline u32 nft_hash_key(const void *data, u32 len, u32 seed)
+{
+       const struct nft_hash_cmp_arg *arg = data;
+
+       return jhash(arg->key, len, seed);
+}
+
+static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
+{
+       const struct nft_hash_elem *he = data;
+
+       return jhash(nft_set_ext_key(&he->ext), len, seed);
+}
+
+static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
+                              const void *ptr)
+{
+       const struct nft_hash_cmp_arg *x = arg->key;
+       const struct nft_hash_elem *he = ptr;
+
+       if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
+               return 1;
+       if (nft_set_elem_expired(&he->ext))
+               return 1;
+       if (!nft_set_elem_active(&he->ext, x->genmask))
+               return 1;
+       return 0;
+}
+
+static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
+                           const u32 *key, const struct nft_set_ext **ext)
+{
+       struct nft_hash *priv = nft_set_priv(set);
+       const struct nft_hash_elem *he;
+       struct nft_hash_cmp_arg arg = {
+               .genmask = nft_genmask_cur(net),
+               .set     = set,
+               .key     = key,
+       };
+
+       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+       if (he != NULL)
+               *ext = &he->ext;
+
+       return !!he;
+}
+
+static bool nft_hash_update(struct nft_set *set, const u32 *key,
+                           void *(*new)(struct nft_set *,
+                                        const struct nft_expr *,
+                                        struct nft_regs *regs),
+                           const struct nft_expr *expr,
+                           struct nft_regs *regs,
+                           const struct nft_set_ext **ext)
+{
+       struct nft_hash *priv = nft_set_priv(set);
+       struct nft_hash_elem *he;
+       struct nft_hash_cmp_arg arg = {
+               .genmask = NFT_GENMASK_ANY,
+               .set     = set,
+               .key     = key,
+       };
+
+       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+       if (he != NULL)
+               goto out;
+
+       he = new(set, expr, regs);
+       if (he == NULL)
+               goto err1;
+       if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
+                                        nft_hash_params))
+               goto err2;
+out:
+       *ext = &he->ext;
+       return true;
+
+err2:
+       nft_set_elem_destroy(set, he);
+err1:
+       return false;
+}
+
+static int nft_hash_insert(const struct net *net, const struct nft_set *set,
+                          const struct nft_set_elem *elem,
+                          struct nft_set_ext **ext)
+{
+       struct nft_hash *priv = nft_set_priv(set);
+       struct nft_hash_elem *he = elem->priv;
+       struct nft_hash_cmp_arg arg = {
+               .genmask = nft_genmask_next(net),
+               .set     = set,
+               .key     = elem->key.val.data,
+       };
+       struct nft_hash_elem *prev;
+
+       prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
+                                              nft_hash_params);
+       if (IS_ERR(prev))
+               return PTR_ERR(prev);
+       if (prev) {
+               *ext = &prev->ext;
+               return -EEXIST;
+       }
+       return 0;
+}
+
+static void nft_hash_activate(const struct net *net, const struct nft_set *set,
+                             const struct nft_set_elem *elem)
+{
+       struct nft_hash_elem *he = elem->priv;
+
+       nft_set_elem_change_active(net, set, &he->ext);
+       nft_set_elem_clear_busy(&he->ext);
+}
+
+static void *nft_hash_deactivate(const struct net *net,
+                                const struct nft_set *set,
+                                const struct nft_set_elem *elem)
+{
+       struct nft_hash *priv = nft_set_priv(set);
+       struct nft_hash_elem *he;
+       struct nft_hash_cmp_arg arg = {
+               .genmask = nft_genmask_next(net),
+               .set     = set,
+               .key     = elem->key.val.data,
+       };
+
+       rcu_read_lock();
+       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+       if (he != NULL) {
+               if (!nft_set_elem_mark_busy(&he->ext) ||
+                   !nft_is_active(net, &he->ext))
+                       nft_set_elem_change_active(net, set, &he->ext);
+               else
+                       he = NULL;
+       }
+       rcu_read_unlock();
+
+       return he;
+}
+
+static void nft_hash_remove(const struct nft_set *set,
+                           const struct nft_set_elem *elem)
+{
+       struct nft_hash *priv = nft_set_priv(set);
+       struct nft_hash_elem *he = elem->priv;
+
+       rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+}
+
+static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
+                         struct nft_set_iter *iter)
+{
+       struct nft_hash *priv = nft_set_priv(set);
+       struct nft_hash_elem *he;
+       struct rhashtable_iter hti;
+       struct nft_set_elem elem;
+       int err;
+
+       err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
+       iter->err = err;
+       if (err)
+               return;
+
+       err = rhashtable_walk_start(&hti);
+       if (err && err != -EAGAIN) {
+               iter->err = err;
+               goto out;
+       }
+
+       while ((he = rhashtable_walk_next(&hti))) {
+               if (IS_ERR(he)) {
+                       err = PTR_ERR(he);
+                       if (err != -EAGAIN) {
+                               iter->err = err;
+                               goto out;
+                       }
+
+                       continue;
+               }
+
+               if (iter->count < iter->skip)
+                       goto cont;
+               if (nft_set_elem_expired(&he->ext))
+                       goto cont;
+               if (!nft_set_elem_active(&he->ext, iter->genmask))
+                       goto cont;
+
+               elem.priv = he;
+
+               iter->err = iter->fn(ctx, set, iter, &elem);
+               if (iter->err < 0)
+                       goto out;
+
+cont:
+               iter->count++;
+       }
+
+out:
+       rhashtable_walk_stop(&hti);
+       rhashtable_walk_exit(&hti);
+}
+
+static void nft_hash_gc(struct work_struct *work)
+{
+       struct nft_set *set;
+       struct nft_hash_elem *he;
+       struct nft_hash *priv;
+       struct nft_set_gc_batch *gcb = NULL;
+       struct rhashtable_iter hti;
+       int err;
+
+       priv = container_of(work, struct nft_hash, gc_work.work);
+       set  = nft_set_container_of(priv);
+
+       err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
+       if (err)
+               goto schedule;
+
+       err = rhashtable_walk_start(&hti);
+       if (err && err != -EAGAIN)
+               goto out;
+
+       while ((he = rhashtable_walk_next(&hti))) {
+               if (IS_ERR(he)) {
+                       if (PTR_ERR(he) != -EAGAIN)
+                               goto out;
+                       continue;
+               }
+
+               if (!nft_set_elem_expired(&he->ext))
+                       continue;
+               if (nft_set_elem_mark_busy(&he->ext))
+                       continue;
+
+               gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+               if (gcb == NULL)
+                       goto out;
+               rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+               atomic_dec(&set->nelems);
+               nft_set_gc_batch_add(gcb, he);
+       }
+out:
+       rhashtable_walk_stop(&hti);
+       rhashtable_walk_exit(&hti);
+
+       nft_set_gc_batch_complete(gcb);
+schedule:
+       queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+                          nft_set_gc_interval(set));
+}
+
+static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
+{
+       return sizeof(struct nft_hash);
+}
+
+static const struct rhashtable_params nft_hash_params = {
+       .head_offset            = offsetof(struct nft_hash_elem, node),
+       .hashfn                 = nft_hash_key,
+       .obj_hashfn             = nft_hash_obj,
+       .obj_cmpfn              = nft_hash_cmp,
+       .automatic_shrinking    = true,
+};
+
+static int nft_hash_init(const struct nft_set *set,
+                        const struct nft_set_desc *desc,
+                        const struct nlattr * const tb[])
+{
+       struct nft_hash *priv = nft_set_priv(set);
+       struct rhashtable_params params = nft_hash_params;
+       int err;
+
+       params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
+       params.key_len    = set->klen;
+
+       err = rhashtable_init(&priv->ht, &params);
+       if (err < 0)
+               return err;
+
+       INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
+       if (set->flags & NFT_SET_TIMEOUT)
+               queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+                                  nft_set_gc_interval(set));
+       return 0;
+}
+
+static void nft_hash_elem_destroy(void *ptr, void *arg)
+{
+       nft_set_elem_destroy((const struct nft_set *)arg, ptr);
+}
+
+static void nft_hash_destroy(const struct nft_set *set)
+{
+       struct nft_hash *priv = nft_set_priv(set);
+
+       cancel_delayed_work_sync(&priv->gc_work);
+       rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
+                                   (void *)set);
+}
+
+static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
+                             struct nft_set_estimate *est)
+{
+       unsigned int esize;
+
+       esize = sizeof(struct nft_hash_elem);
+       if (desc->size) {
+               est->size = sizeof(struct nft_hash) +
+                           roundup_pow_of_two(desc->size * 4 / 3) *
+                           sizeof(struct nft_hash_elem *) +
+                           desc->size * esize;
+       } else {
+               /* Resizing happens when the load drops below 30% or goes
+                * above 75%. The average of 52.5% load (approximated by 50%)
+                * is used for the size estimation of the hash buckets,
+                * meaning we calculate two buckets per element.
+                */
+               est->size = esize + 2 * sizeof(struct nft_hash_elem *);
+       }
+
+       est->class = NFT_SET_CLASS_O_1;
+
+       return true;
+}
+
+static struct nft_set_ops nft_hash_ops __read_mostly = {
+       .privsize       = nft_hash_privsize,
+       .elemsize       = offsetof(struct nft_hash_elem, ext),
+       .estimate       = nft_hash_estimate,
+       .init           = nft_hash_init,
+       .destroy        = nft_hash_destroy,
+       .insert         = nft_hash_insert,
+       .activate       = nft_hash_activate,
+       .deactivate     = nft_hash_deactivate,
+       .remove         = nft_hash_remove,
+       .lookup         = nft_hash_lookup,
+       .update         = nft_hash_update,
+       .walk           = nft_hash_walk,
+       .features       = NFT_SET_MAP | NFT_SET_TIMEOUT,
+       .owner          = THIS_MODULE,
+};
+
+static int __init nft_hash_module_init(void)
+{
+       return nft_register_set(&nft_hash_ops);
+}
+
+static void __exit nft_hash_module_exit(void)
+{
+       nft_unregister_set(&nft_hash_ops);
+}
+
+module_init(nft_hash_module_init);
+module_exit(nft_hash_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
new file mode 100644 (file)
index 0000000..38b5bda
--- /dev/null
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+static DEFINE_SPINLOCK(nft_rbtree_lock);
+
+struct nft_rbtree {
+       struct rb_root          root;
+};
+
+struct nft_rbtree_elem {
+       struct rb_node          node;
+       struct nft_set_ext      ext;
+};
+
+static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
+{
+       return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
+              (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
+}
+
+static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
+                            const struct nft_rbtree_elem *interval)
+{
+       return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
+}
+
+static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+                             const u32 *key, const struct nft_set_ext **ext)
+{
+       const struct nft_rbtree *priv = nft_set_priv(set);
+       const struct nft_rbtree_elem *rbe, *interval = NULL;
+       u8 genmask = nft_genmask_cur(net);
+       const struct rb_node *parent;
+       const void *this;
+       int d;
+
+       spin_lock_bh(&nft_rbtree_lock);
+       parent = priv->root.rb_node;
+       while (parent != NULL) {
+               rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+               this = nft_set_ext_key(&rbe->ext);
+               d = memcmp(this, key, set->klen);
+               if (d < 0) {
+                       parent = parent->rb_left;
+                       /* In case of adjacent ranges, we always see the high
+                        * part of the range in first place, before the low one.
+                        * So don't update interval if the keys are equal.
+                        */
+                       if (interval && nft_rbtree_equal(set, this, interval))
+                               continue;
+                       interval = rbe;
+               } else if (d > 0)
+                       parent = parent->rb_right;
+               else {
+                       if (!nft_set_elem_active(&rbe->ext, genmask)) {
+                               parent = parent->rb_left;
+                               continue;
+                       }
+                       if (nft_rbtree_interval_end(rbe))
+                               goto out;
+                       spin_unlock_bh(&nft_rbtree_lock);
+
+                       *ext = &rbe->ext;
+                       return true;
+               }
+       }
+
+       if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+           nft_set_elem_active(&interval->ext, genmask) &&
+           !nft_rbtree_interval_end(interval)) {
+               spin_unlock_bh(&nft_rbtree_lock);
+               *ext = &interval->ext;
+               return true;
+       }
+out:
+       spin_unlock_bh(&nft_rbtree_lock);
+       return false;
+}
+
+static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
+                              struct nft_rbtree_elem *new,
+                              struct nft_set_ext **ext)
+{
+       struct nft_rbtree *priv = nft_set_priv(set);
+       u8 genmask = nft_genmask_next(net);
+       struct nft_rbtree_elem *rbe;
+       struct rb_node *parent, **p;
+       int d;
+
+       parent = NULL;
+       p = &priv->root.rb_node;
+       while (*p != NULL) {
+               parent = *p;
+               rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+               d = memcmp(nft_set_ext_key(&rbe->ext),
+                          nft_set_ext_key(&new->ext),
+                          set->klen);
+               if (d < 0)
+                       p = &parent->rb_left;
+               else if (d > 0)
+                       p = &parent->rb_right;
+               else {
+                       if (nft_set_elem_active(&rbe->ext, genmask)) {
+                               if (nft_rbtree_interval_end(rbe) &&
+                                   !nft_rbtree_interval_end(new))
+                                       p = &parent->rb_left;
+                               else if (!nft_rbtree_interval_end(rbe) &&
+                                        nft_rbtree_interval_end(new))
+                                       p = &parent->rb_right;
+                               else {
+                                       *ext = &rbe->ext;
+                                       return -EEXIST;
+                               }
+                       }
+               }
+       }
+       rb_link_node(&new->node, parent, p);
+       rb_insert_color(&new->node, &priv->root);
+       return 0;
+}
+
+static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
+                            const struct nft_set_elem *elem,
+                            struct nft_set_ext **ext)
+{
+       struct nft_rbtree_elem *rbe = elem->priv;
+       int err;
+
+       spin_lock_bh(&nft_rbtree_lock);
+       err = __nft_rbtree_insert(net, set, rbe, ext);
+       spin_unlock_bh(&nft_rbtree_lock);
+
+       return err;
+}
+
+static void nft_rbtree_remove(const struct nft_set *set,
+                             const struct nft_set_elem *elem)
+{
+       struct nft_rbtree *priv = nft_set_priv(set);
+       struct nft_rbtree_elem *rbe = elem->priv;
+
+       spin_lock_bh(&nft_rbtree_lock);
+       rb_erase(&rbe->node, &priv->root);
+       spin_unlock_bh(&nft_rbtree_lock);
+}
+
+static void nft_rbtree_activate(const struct net *net,
+                               const struct nft_set *set,
+                               const struct nft_set_elem *elem)
+{
+       struct nft_rbtree_elem *rbe = elem->priv;
+
+       nft_set_elem_change_active(net, set, &rbe->ext);
+}
+
+static void *nft_rbtree_deactivate(const struct net *net,
+                                  const struct nft_set *set,
+                                  const struct nft_set_elem *elem)
+{
+       const struct nft_rbtree *priv = nft_set_priv(set);
+       const struct rb_node *parent = priv->root.rb_node;
+       struct nft_rbtree_elem *rbe, *this = elem->priv;
+       u8 genmask = nft_genmask_next(net);
+       int d;
+
+       while (parent != NULL) {
+               rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+               d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val,
+                                          set->klen);
+               if (d < 0)
+                       parent = parent->rb_left;
+               else if (d > 0)
+                       parent = parent->rb_right;
+               else {
+                       if (!nft_set_elem_active(&rbe->ext, genmask)) {
+                               parent = parent->rb_left;
+                               continue;
+                       }
+                       if (nft_rbtree_interval_end(rbe) &&
+                           !nft_rbtree_interval_end(this)) {
+                               parent = parent->rb_left;
+                               continue;
+                       } else if (!nft_rbtree_interval_end(rbe) &&
+                                  nft_rbtree_interval_end(this)) {
+                               parent = parent->rb_right;
+                               continue;
+                       }
+                       nft_set_elem_change_active(net, set, &rbe->ext);
+                       return rbe;
+               }
+       }
+       return NULL;
+}
+
+static void nft_rbtree_walk(const struct nft_ctx *ctx,
+                           const struct nft_set *set,
+                           struct nft_set_iter *iter)
+{
+       const struct nft_rbtree *priv = nft_set_priv(set);
+       struct nft_rbtree_elem *rbe;
+       struct nft_set_elem elem;
+       struct rb_node *node;
+
+       spin_lock_bh(&nft_rbtree_lock);
+       for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+               rbe = rb_entry(node, struct nft_rbtree_elem, node);
+
+               if (iter->count < iter->skip)
+                       goto cont;
+               if (!nft_set_elem_active(&rbe->ext, iter->genmask))
+                       goto cont;
+
+               elem.priv = rbe;
+
+               iter->err = iter->fn(ctx, set, iter, &elem);
+               if (iter->err < 0) {
+                       spin_unlock_bh(&nft_rbtree_lock);
+                       return;
+               }
+cont:
+               iter->count++;
+       }
+       spin_unlock_bh(&nft_rbtree_lock);
+}
+
+static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
+{
+       return sizeof(struct nft_rbtree);
+}
+
+static int nft_rbtree_init(const struct nft_set *set,
+                          const struct nft_set_desc *desc,
+                          const struct nlattr * const nla[])
+{
+       struct nft_rbtree *priv = nft_set_priv(set);
+
+       priv->root = RB_ROOT;
+       return 0;
+}
+
+static void nft_rbtree_destroy(const struct nft_set *set)
+{
+       struct nft_rbtree *priv = nft_set_priv(set);
+       struct nft_rbtree_elem *rbe;
+       struct rb_node *node;
+
+       while ((node = priv->root.rb_node) != NULL) {
+               rb_erase(node, &priv->root);
+               rbe = rb_entry(node, struct nft_rbtree_elem, node);
+               nft_set_elem_destroy(set, rbe);
+       }
+}
+
+static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
+                               struct nft_set_estimate *est)
+{
+       unsigned int nsize;
+
+       nsize = sizeof(struct nft_rbtree_elem);
+       if (desc->size)
+               est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
+       else
+               est->size = nsize;
+
+       est->class = NFT_SET_CLASS_O_LOG_N;
+
+       return true;
+}
+
+static struct nft_set_ops nft_rbtree_ops __read_mostly = {
+       .privsize       = nft_rbtree_privsize,
+       .elemsize       = offsetof(struct nft_rbtree_elem, ext),
+       .estimate       = nft_rbtree_estimate,
+       .init           = nft_rbtree_init,
+       .destroy        = nft_rbtree_destroy,
+       .insert         = nft_rbtree_insert,
+       .remove         = nft_rbtree_remove,
+       .deactivate     = nft_rbtree_deactivate,
+       .activate       = nft_rbtree_activate,
+       .lookup         = nft_rbtree_lookup,
+       .walk           = nft_rbtree_walk,
+       .features       = NFT_SET_INTERVAL | NFT_SET_MAP,
+       .owner          = THIS_MODULE,
+};
+
+static int __init nft_rbtree_module_init(void)
+{
+       return nft_register_set(&nft_rbtree_ops);
+}
+
+static void __exit nft_rbtree_module_exit(void)
+{
+       nft_unregister_set(&nft_rbtree_ops);
+}
+
+module_init(nft_rbtree_module_init);
+module_exit(nft_rbtree_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_SET();
index 7f4414d..663c4c3 100644 (file)
@@ -127,6 +127,8 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
                                                    daddr, dport,
                                                    in->ifindex);
 
+                       if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+                               sk = NULL;
                        /* NOTE: we return listeners even if bound to
                         * 0.0.0.0, those are filtered out in
                         * xt_socket, since xt_TPROXY needs 0 bound
@@ -195,6 +197,8 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
                                                   daddr, ntohs(dport),
                                                   in->ifindex);
 
+                       if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+                               sk = NULL;
                        /* NOTE: we return listeners even if bound to
                         * 0.0.0.0, those are filtered out in
                         * xt_socket, since xt_TPROXY needs 0 bound
index 188404b..a3b8f69 100644 (file)
@@ -233,10 +233,8 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
                return false;
 
        if (info->match_flags & XT_CONNTRACK_EXPIRES) {
-               unsigned long expires = 0;
+               unsigned long expires = nf_ct_expires(ct) / HZ;
 
-               if (timer_pending(&ct->timeout))
-                       expires = (ct->timeout.expires - jiffies) / HZ;
                if ((expires >= info->expires_min &&
                    expires <= info->expires_max) ^
                    !(info->invert_flags & XT_CONNTRACK_EXPIRES))
index 3048a7e..cf32759 100644 (file)
@@ -26,7 +26,7 @@ static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
        nfnl_acct_update(skb, info->nfacct);
 
-       overquota = nfnl_acct_overquota(skb, info->nfacct);
+       overquota = nfnl_acct_overquota(par->net, skb, info->nfacct);
 
        return overquota == NFACCT_UNDERQUOTA ? false : true;
 }
index e5f1898..bb33598 100644 (file)
@@ -107,8 +107,8 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
             info->invert & XT_PHYSDEV_OP_BRIDGED) &&
            par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
            (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
-               pr_info("using --physdev-out and --physdev-is-out are only"
-                       "supported in the FORWARD and POSTROUTING chains with"
+               pr_info("using --physdev-out and --physdev-is-out are only "
+                       "supported in the FORWARD and POSTROUTING chains with "
                        "bridged traffic.\n");
                if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
                        return -EINVAL;
index 8dd836a..b2f0e98 100644 (file)
@@ -63,43 +63,74 @@ out_nlmsg_trim:
 static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
                                int protocol, int s_num)
 {
+       struct rhashtable_iter *hti = (void *)cb->args[2];
        struct netlink_table *tbl = &nl_table[protocol];
-       struct rhashtable *ht = &tbl->hash;
-       const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht);
        struct net *net = sock_net(skb->sk);
        struct netlink_diag_req *req;
        struct netlink_sock *nlsk;
        struct sock *sk;
-       int ret = 0, num = 0, i;
+       int num = 2;
+       int ret = 0;
 
        req = nlmsg_data(cb->nlh);
 
-       for (i = 0; i < htbl->size; i++) {
-               struct rhash_head *pos;
+       if (s_num > 1)
+               goto mc_list;
 
-               rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) {
-                       sk = (struct sock *)nlsk;
+       num--;
 
-                       if (!net_eq(sock_net(sk), net))
-                               continue;
-                       if (num < s_num) {
-                               num++;
+       if (!hti) {
+               hti = kmalloc(sizeof(*hti), GFP_KERNEL);
+               if (!hti)
+                       return -ENOMEM;
+
+               cb->args[2] = (long)hti;
+       }
+
+       if (!s_num)
+               rhashtable_walk_enter(&tbl->hash, hti);
+
+       ret = rhashtable_walk_start(hti);
+       if (ret == -EAGAIN)
+               ret = 0;
+       if (ret)
+               goto stop;
+
+       while ((nlsk = rhashtable_walk_next(hti))) {
+               if (IS_ERR(nlsk)) {
+                       ret = PTR_ERR(nlsk);
+                       if (ret == -EAGAIN) {
+                               ret = 0;
                                continue;
                        }
+                       break;
+               }
 
-                       if (sk_diag_fill(sk, skb, req,
-                                        NETLINK_CB(cb->skb).portid,
-                                        cb->nlh->nlmsg_seq,
-                                        NLM_F_MULTI,
-                                        sock_i_ino(sk)) < 0) {
-                               ret = 1;
-                               goto done;
-                       }
+               sk = (struct sock *)nlsk;
 
-                       num++;
+               if (!net_eq(sock_net(sk), net))
+                       continue;
+
+               if (sk_diag_fill(sk, skb, req,
+                                NETLINK_CB(cb->skb).portid,
+                                cb->nlh->nlmsg_seq,
+                                NLM_F_MULTI,
+                                sock_i_ino(sk)) < 0) {
+                       ret = 1;
+                       break;
                }
        }
 
+stop:
+       rhashtable_walk_stop(hti);
+       if (ret)
+               goto done;
+
+       rhashtable_walk_exit(hti);
+       num++;
+
+mc_list:
+       read_lock(&nl_table_lock);
        sk_for_each_bound(sk, &tbl->mc_list) {
                if (sk_hashed(sk))
                        continue;
@@ -116,13 +147,14 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
                                 NLM_F_MULTI,
                                 sock_i_ino(sk)) < 0) {
                        ret = 1;
-                       goto done;
+                       break;
                }
                num++;
        }
+       read_unlock(&nl_table_lock);
+
 done:
        cb->args[0] = num;
-       cb->args[1] = protocol;
 
        return ret;
 }
@@ -131,20 +163,20 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
        struct netlink_diag_req *req;
        int s_num = cb->args[0];
+       int err = 0;
 
        req = nlmsg_data(cb->nlh);
 
-       rcu_read_lock();
-       read_lock(&nl_table_lock);
-
        if (req->sdiag_protocol == NDIAG_PROTO_ALL) {
                int i;
 
                for (i = cb->args[1]; i < MAX_LINKS; i++) {
-                       if (__netlink_diag_dump(skb, cb, i, s_num))
+                       err = __netlink_diag_dump(skb, cb, i, s_num);
+                       if (err)
                                break;
                        s_num = 0;
                }
+               cb->args[1] = i;
        } else {
                if (req->sdiag_protocol >= MAX_LINKS) {
                        read_unlock(&nl_table_lock);
@@ -152,13 +184,22 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
                        return -ENOENT;
                }
 
-               __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
+               err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
        }
 
-       read_unlock(&nl_table_lock);
-       rcu_read_unlock();
+       return err < 0 ? err : skb->len;
+}
+
+static int netlink_diag_dump_done(struct netlink_callback *cb)
+{
+       struct rhashtable_iter *hti = (void *)cb->args[2];
+
+       if (cb->args[0] == 1)
+               rhashtable_walk_exit(hti);
 
-       return skb->len;
+       kfree(hti);
+
+       return 0;
 }
 
 static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
@@ -172,6 +213,7 @@ static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
        if (h->nlmsg_flags & NLM_F_DUMP) {
                struct netlink_dump_control c = {
                        .dump = netlink_diag_dump,
+                       .done = netlink_diag_dump_done,
                };
                return netlink_dump_start(net->diag_nlsk, skb, h, &c);
        } else
index a09132a..23cc126 100644 (file)
@@ -977,7 +977,7 @@ static int genl_ctrl_event(int event, struct genl_family *family,
        return 0;
 }
 
-static struct genl_ops genl_ctrl_ops[] = {
+static const struct genl_ops genl_ctrl_ops[] = {
        {
                .cmd            = CTRL_CMD_GETFAMILY,
                .doit           = ctrl_getfamily,
@@ -986,7 +986,7 @@ static struct genl_ops genl_ctrl_ops[] = {
        },
 };
 
-static struct genl_multicast_group genl_ctrl_groups[] = {
+static const struct genl_multicast_group genl_ctrl_groups[] = {
        { .name = "notify", },
 };
 
index 1ecbd77..863e992 100644 (file)
@@ -71,6 +71,8 @@ struct ovs_frag_data {
 static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);
 
 #define DEFERRED_ACTION_FIFO_SIZE 10
+#define OVS_RECURSION_LIMIT 5
+#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)
 struct action_fifo {
        int head;
        int tail;
@@ -78,7 +80,12 @@ struct action_fifo {
        struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
 };
 
+struct recirc_keys {
+       struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
+};
+
 static struct action_fifo __percpu *action_fifos;
+static struct recirc_keys __percpu *recirc_keys;
 static DEFINE_PER_CPU(int, exec_actions_level);
 
 static void action_fifo_init(struct action_fifo *fifo)
@@ -162,10 +169,16 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
        if (skb_cow_head(skb, MPLS_HLEN) < 0)
                return -ENOMEM;
 
+       if (!skb->inner_protocol) {
+               skb_set_inner_network_header(skb, skb->mac_len);
+               skb_set_inner_protocol(skb, skb->protocol);
+       }
+
        skb_push(skb, MPLS_HLEN);
        memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
                skb->mac_len);
        skb_reset_mac_header(skb);
+       skb_set_network_header(skb, skb->mac_len);
 
        new_mpls_lse = (__be32 *)skb_mpls_header(skb);
        *new_mpls_lse = mpls->mpls_lse;
@@ -173,8 +186,6 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
        skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
 
        update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
-       if (!skb->inner_protocol)
-               skb_set_inner_protocol(skb, skb->protocol);
        skb->protocol = mpls->mpls_ethertype;
 
        invalidate_flow_key(key);
@@ -198,6 +209,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 
        __skb_pull(skb, MPLS_HLEN);
        skb_reset_mac_header(skb);
+       skb_set_network_header(skb, skb->mac_len);
 
        /* skb_mpls_header() is used to locate the ethertype
         * field correctly in the presence of VLAN tags.
@@ -241,20 +253,24 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
        int err;
 
        err = skb_vlan_pop(skb);
-       if (skb_vlan_tag_present(skb))
+       if (skb_vlan_tag_present(skb)) {
                invalidate_flow_key(key);
-       else
-               key->eth.tci = 0;
+       } else {
+               key->eth.vlan.tci = 0;
+               key->eth.vlan.tpid = 0;
+       }
        return err;
 }
 
 static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
                     const struct ovs_action_push_vlan *vlan)
 {
-       if (skb_vlan_tag_present(skb))
+       if (skb_vlan_tag_present(skb)) {
                invalidate_flow_key(key);
-       else
-               key->eth.tci = vlan->vlan_tci;
+       } else {
+               key->eth.vlan.tci = vlan->vlan_tci;
+               key->eth.vlan.tpid = vlan->vlan_tpid;
+       }
        return skb_vlan_push(skb, vlan->vlan_tpid,
                             ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
 }
@@ -1011,6 +1027,7 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
                          const struct nlattr *a, int rem)
 {
        struct deferred_action *da;
+       int level;
 
        if (!is_flow_key_valid(key)) {
                int err;
@@ -1034,6 +1051,18 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
                        return 0;
        }
 
+       level = this_cpu_read(exec_actions_level);
+       if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
+               struct recirc_keys *rks = this_cpu_ptr(recirc_keys);
+               struct sw_flow_key *recirc_key = &rks->key[level - 1];
+
+               *recirc_key = *key;
+               recirc_key->recirc_id = nla_get_u32(a);
+               ovs_dp_process_packet(skb, recirc_key);
+
+               return 0;
+       }
+
        da = add_deferred_actions(skb, key, NULL);
        if (da) {
                da->pkt_key.recirc_id = nla_get_u32(a);
@@ -1200,11 +1229,10 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
                        const struct sw_flow_actions *acts,
                        struct sw_flow_key *key)
 {
-       static const int ovs_recursion_limit = 5;
        int err, level;
 
        level = __this_cpu_inc_return(exec_actions_level);
-       if (unlikely(level > ovs_recursion_limit)) {
+       if (unlikely(level > OVS_RECURSION_LIMIT)) {
                net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
                                     ovs_dp_name(dp));
                kfree_skb(skb);
@@ -1229,10 +1257,17 @@ int action_fifos_init(void)
        if (!action_fifos)
                return -ENOMEM;
 
+       recirc_keys = alloc_percpu(struct recirc_keys);
+       if (!recirc_keys) {
+               free_percpu(action_fifos);
+               return -ENOMEM;
+       }
+
        return 0;
 }
 
 void action_fifos_exit(void)
 {
        free_percpu(action_fifos);
+       free_percpu(recirc_keys);
 }
index e054a74..31045ef 100644 (file)
@@ -1367,7 +1367,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
        if (ct_info->helper)
                module_put(ct_info->helper->me);
        if (ct_info->ct)
-               nf_ct_put(ct_info->ct);
+               nf_ct_tmpl_free(ct_info->ct);
 }
 
 void ovs_ct_init(struct net *net)
index 524c0fd..0536ab3 100644 (file)
@@ -2437,3 +2437,7 @@ module_exit(dp_cleanup);
 
 MODULE_DESCRIPTION("Open vSwitch switching datapath");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
+MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
+MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
+MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
index 0ea128e..1240ae3 100644 (file)
@@ -302,24 +302,57 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
                                  sizeof(struct icmp6hdr));
 }
 
-static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+/**
+ * Parse vlan tag from vlan header.
+ * Returns ERROR on memory error.
+ * Returns 0 if it encounters a non-vlan or incomplete packet.
+ * Returns 1 after successfully parsing vlan tag.
+ */
+static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
 {
-       struct qtag_prefix {
-               __be16 eth_type; /* ETH_P_8021Q */
-               __be16 tci;
-       };
-       struct qtag_prefix *qp;
+       struct vlan_head *vh = (struct vlan_head *)skb->data;
 
-       if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16)))
+       if (likely(!eth_type_vlan(vh->tpid)))
                return 0;
 
-       if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) +
-                                        sizeof(__be16))))
+       if (unlikely(skb->len < sizeof(struct vlan_head) + sizeof(__be16)))
+               return 0;
+
+       if (unlikely(!pskb_may_pull(skb, sizeof(struct vlan_head) +
+                                sizeof(__be16))))
                return -ENOMEM;
 
-       qp = (struct qtag_prefix *) skb->data;
-       key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT);
-       __skb_pull(skb, sizeof(struct qtag_prefix));
+       vh = (struct vlan_head *)skb->data;
+       key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT);
+       key_vh->tpid = vh->tpid;
+
+       __skb_pull(skb, sizeof(struct vlan_head));
+       return 1;
+}
+
+static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+{
+       int res;
+
+       key->eth.vlan.tci = 0;
+       key->eth.vlan.tpid = 0;
+       key->eth.cvlan.tci = 0;
+       key->eth.cvlan.tpid = 0;
+
+       if (likely(skb_vlan_tag_present(skb))) {
+               key->eth.vlan.tci = htons(skb->vlan_tci);
+               key->eth.vlan.tpid = skb->vlan_proto;
+       } else {
+               /* Parse outer vlan tag in the non-accelerated case. */
+               res = parse_vlan_tag(skb, &key->eth.vlan);
+               if (res <= 0)
+                       return res;
+       }
+
+       /* Parse inner vlan tag. */
+       res = parse_vlan_tag(skb, &key->eth.cvlan);
+       if (res <= 0)
+               return res;
 
        return 0;
 }
@@ -480,12 +513,8 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
         * update skb->csum here.
         */
 
-       key->eth.tci = 0;
-       if (skb_vlan_tag_present(skb))
-               key->eth.tci = htons(skb->vlan_tci);
-       else if (eth->h_proto == htons(ETH_P_8021Q))
-               if (unlikely(parse_vlan(skb, key)))
-                       return -ENOMEM;
+       if (unlikely(parse_vlan(skb, key)))
+               return -ENOMEM;
 
        key->eth.type = parse_ethertype(skb);
        if (unlikely(key->eth.type == htons(0)))
index 03378e7..156a302 100644 (file)
@@ -50,6 +50,11 @@ struct ovs_tunnel_info {
        struct metadata_dst     *tun_dst;
 };
 
+struct vlan_head {
+       __be16 tpid; /* Vlan type. Generally 802.1q or 802.1ad.*/
+       __be16 tci;  /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+};
+
 #define OVS_SW_FLOW_KEY_METADATA_SIZE                  \
        (offsetof(struct sw_flow_key, recirc_id) +      \
        FIELD_SIZEOF(struct sw_flow_key, recirc_id))
@@ -69,7 +74,8 @@ struct sw_flow_key {
        struct {
                u8     src[ETH_ALEN];   /* Ethernet source address. */
                u8     dst[ETH_ALEN];   /* Ethernet destination address. */
-               __be16 tci;             /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+               struct vlan_head vlan;
+               struct vlan_head cvlan;
                __be16 type;            /* Ethernet frame type. */
        } eth;
        union {
index c78a6a1..8efa718 100644 (file)
@@ -808,6 +808,167 @@ int ovs_nla_put_tunnel_info(struct sk_buff *skb,
                                  ip_tunnel_info_af(tun_info));
 }
 
+static int encode_vlan_from_nlattrs(struct sw_flow_match *match,
+                                   const struct nlattr *a[],
+                                   bool is_mask, bool inner)
+{
+       __be16 tci = 0;
+       __be16 tpid = 0;
+
+       if (a[OVS_KEY_ATTR_VLAN])
+               tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+       if (a[OVS_KEY_ATTR_ETHERTYPE])
+               tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+
+       if (likely(!inner)) {
+               SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask);
+               SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask);
+       } else {
+               SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask);
+               SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask);
+       }
+       return 0;
+}
+
+static int validate_vlan_from_nlattrs(const struct sw_flow_match *match,
+                                     u64 key_attrs, bool inner,
+                                     const struct nlattr **a, bool log)
+{
+       __be16 tci = 0;
+
+       if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
+             (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
+              eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) {
+               /* Not a VLAN. */
+               return 0;
+       }
+
+       if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
+             (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
+               OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN");
+               return -EINVAL;
+       }
+
+       if (a[OVS_KEY_ATTR_VLAN])
+               tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+       if (!(tci & htons(VLAN_TAG_PRESENT))) {
+               if (tci) {
+                       OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.",
+                                 (inner) ? "C-VLAN" : "VLAN");
+                       return -EINVAL;
+               } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) {
+                       /* Corner case for truncated VLAN header. */
+                       OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.",
+                                 (inner) ? "C-VLAN" : "VLAN");
+                       return -EINVAL;
+               }
+       }
+
+       return 1;
+}
+
+static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match,
+                                          u64 key_attrs, bool inner,
+                                          const struct nlattr **a, bool log)
+{
+       __be16 tci = 0;
+       __be16 tpid = 0;
+       bool encap_valid = !!(match->key->eth.vlan.tci &
+                             htons(VLAN_TAG_PRESENT));
+       bool i_encap_valid = !!(match->key->eth.cvlan.tci &
+                               htons(VLAN_TAG_PRESENT));
+
+       if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) {
+               /* Not a VLAN. */
+               return 0;
+       }
+
+       if ((!inner && !encap_valid) || (inner && !i_encap_valid)) {
+               OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.",
+                         (inner) ? "C-VLAN" : "VLAN");
+               return -EINVAL;
+       }
+
+       if (a[OVS_KEY_ATTR_VLAN])
+               tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+       if (a[OVS_KEY_ATTR_ETHERTYPE])
+               tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+
+       if (tpid != htons(0xffff)) {
+               OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).",
+                         (inner) ? "C-VLAN" : "VLAN", ntohs(tpid));
+               return -EINVAL;
+       }
+       if (!(tci & htons(VLAN_TAG_PRESENT))) {
+               OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.",
+                         (inner) ? "C-VLAN" : "VLAN");
+               return -EINVAL;
+       }
+
+       return 1;
+}
+
+static int __parse_vlan_from_nlattrs(struct sw_flow_match *match,
+                                    u64 *key_attrs, bool inner,
+                                    const struct nlattr **a, bool is_mask,
+                                    bool log)
+{
+       int err;
+       const struct nlattr *encap;
+
+       if (!is_mask)
+               err = validate_vlan_from_nlattrs(match, *key_attrs, inner,
+                                                a, log);
+       else
+               err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner,
+                                                     a, log);
+       if (err <= 0)
+               return err;
+
+       err = encode_vlan_from_nlattrs(match, a, is_mask, inner);
+       if (err)
+               return err;
+
+       *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
+       *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
+       *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+
+       encap = a[OVS_KEY_ATTR_ENCAP];
+
+       if (!is_mask)
+               err = parse_flow_nlattrs(encap, a, key_attrs, log);
+       else
+               err = parse_flow_mask_nlattrs(encap, a, key_attrs, log);
+
+       return err;
+}
+
+static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
+                                  u64 *key_attrs, const struct nlattr **a,
+                                  bool is_mask, bool log)
+{
+       int err;
+       bool encap_valid = false;
+
+       err = __parse_vlan_from_nlattrs(match, key_attrs, false, a,
+                                       is_mask, log);
+       if (err)
+               return err;
+
+       encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_TAG_PRESENT));
+       if (encap_valid) {
+               err = __parse_vlan_from_nlattrs(match, key_attrs, true, a,
+                                               is_mask, log);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
                                 u64 *attrs, const struct nlattr **a,
                                 bool is_mask, bool log)
@@ -923,20 +1084,11 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
        }
 
        if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
-               __be16 tci;
-
-               tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
-               if (!(tci & htons(VLAN_TAG_PRESENT))) {
-                       if (is_mask)
-                               OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.");
-                       else
-                               OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set.");
-
-                       return -EINVAL;
-               }
-
-               SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
-               attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
+               /* VLAN attribute is always parsed before getting here since it
+                * may occur multiple times.
+                */
+               OVS_NLERR(log, "VLAN attribute unexpected.");
+               return -EINVAL;
        }
 
        if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
@@ -1182,49 +1334,18 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
                      bool log)
 {
        const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
-       const struct nlattr *encap;
        struct nlattr *newmask = NULL;
        u64 key_attrs = 0;
        u64 mask_attrs = 0;
-       bool encap_valid = false;
        int err;
 
        err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
        if (err)
                return err;
 
-       if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
-           (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
-           (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
-               __be16 tci;
-
-               if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
-                     (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
-                       OVS_NLERR(log, "Invalid Vlan frame.");
-                       return -EINVAL;
-               }
-
-               key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
-               tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
-               encap = a[OVS_KEY_ATTR_ENCAP];
-               key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
-               encap_valid = true;
-
-               if (tci & htons(VLAN_TAG_PRESENT)) {
-                       err = parse_flow_nlattrs(encap, a, &key_attrs, log);
-                       if (err)
-                               return err;
-               } else if (!tci) {
-                       /* Corner case for truncated 802.1Q header. */
-                       if (nla_len(encap)) {
-                               OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute.");
-                               return -EINVAL;
-                       }
-               } else {
-                       OVS_NLERR(log, "Encap attr is set for non-VLAN frame");
-                       return  -EINVAL;
-               }
-       }
+       err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log);
+       if (err)
+               return err;
 
        err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
        if (err)
@@ -1265,46 +1386,12 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
                        goto free_newmask;
 
                /* Always match on tci. */
-               SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
-
-               if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) {
-                       __be16 eth_type = 0;
-                       __be16 tci = 0;
-
-                       if (!encap_valid) {
-                               OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame.");
-                               err = -EINVAL;
-                               goto free_newmask;
-                       }
-
-                       mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
-                       if (a[OVS_KEY_ATTR_ETHERTYPE])
-                               eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
-
-                       if (eth_type == htons(0xffff)) {
-                               mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
-                               encap = a[OVS_KEY_ATTR_ENCAP];
-                               err = parse_flow_mask_nlattrs(encap, a,
-                                                             &mask_attrs, log);
-                               if (err)
-                                       goto free_newmask;
-                       } else {
-                               OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).",
-                                         ntohs(eth_type));
-                               err = -EINVAL;
-                               goto free_newmask;
-                       }
-
-                       if (a[OVS_KEY_ATTR_VLAN])
-                               tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+               SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true);
+               SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true);
 
-                       if (!(tci & htons(VLAN_TAG_PRESENT))) {
-                               OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).",
-                                         ntohs(tci));
-                               err = -EINVAL;
-                               goto free_newmask;
-                       }
-               }
+               err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log);
+               if (err)
+                       goto free_newmask;
 
                err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
                                           log);
@@ -1410,12 +1497,25 @@ int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr,
        return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
 }
 
+static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
+                           bool is_mask)
+{
+       __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff);
+
+       if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
+           nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci))
+               return -EMSGSIZE;
+       return 0;
+}
+
 static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
                             const struct sw_flow_key *output, bool is_mask,
                             struct sk_buff *skb)
 {
        struct ovs_key_ethernet *eth_key;
-       struct nlattr *nla, *encap;
+       struct nlattr *nla;
+       struct nlattr *encap = NULL;
+       struct nlattr *in_encap = NULL;
 
        if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
                goto nla_put_failure;
@@ -1464,17 +1564,21 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
        ether_addr_copy(eth_key->eth_src, output->eth.src);
        ether_addr_copy(eth_key->eth_dst, output->eth.dst);
 
-       if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
-               __be16 eth_type;
-               eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
-               if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
-                   nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
+       if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
+               if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
                        goto nla_put_failure;
                encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
-               if (!swkey->eth.tci)
+               if (!swkey->eth.vlan.tci)
                        goto unencap;
-       } else
-               encap = NULL;
+
+               if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
+                       if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
+                               goto nla_put_failure;
+                       in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+                       if (!swkey->eth.cvlan.tci)
+                               goto unencap;
+               }
+       }
 
        if (swkey->eth.type == htons(ETH_P_802_2)) {
                /*
@@ -1493,6 +1597,14 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
        if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
                goto nla_put_failure;
 
+       if (eth_type_vlan(swkey->eth.type)) {
+               /* There are 3 VLAN tags, we don't know anything about the rest
+                * of the packet, so truncate here.
+                */
+               WARN_ON_ONCE(!(encap && in_encap));
+               goto unencap;
+       }
+
        if (swkey->eth.type == htons(ETH_P_IP)) {
                struct ovs_key_ipv4 *ipv4_key;
 
@@ -1619,6 +1731,8 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
        }
 
 unencap:
+       if (in_encap)
+               nla_nest_end(skb, in_encap);
        if (encap)
                nla_nest_end(skb, encap);
 
@@ -2283,7 +2397,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 
                case OVS_ACTION_ATTR_PUSH_VLAN:
                        vlan = nla_data(a);
-                       if (vlan->vlan_tpid != htons(ETH_P_8021Q))
+                       if (!eth_type_vlan(vlan->vlan_tpid))
                                return -EINVAL;
                        if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
                                return -EINVAL;
@@ -2388,7 +2502,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 
        (*sfa)->orig_len = nla_len(attr);
        err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type,
-                                    key->eth.tci, log);
+                                    key->eth.vlan.tci, log);
        if (err)
                ovs_nla_free_flow_actions(*sfa);
 
index 6b21fd0..8f19843 100644 (file)
@@ -485,9 +485,14 @@ static unsigned int packet_length(const struct sk_buff *skb)
 {
        unsigned int length = skb->len - ETH_HLEN;
 
-       if (skb->protocol == htons(ETH_P_8021Q))
+       if (skb_vlan_tagged(skb))
                length -= VLAN_HLEN;
 
+       /* Don't subtract for multiple VLAN tags. Most (all?) drivers allow
+        * (ETH_LEN + VLAN_HLEN) in addition to the mtu value, but almost none
+        * account for 802.1ad. e.g. is_skb_forwardable().
+        */
+
        return length;
 }
 
index 784c531..13396c7 100644 (file)
@@ -19,6 +19,13 @@ config AF_RXRPC
 
          See Documentation/networking/rxrpc.txt.
 
+config AF_RXRPC_IPV6
+       bool "IPv6 support for RxRPC"
+       depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC)
+       help
+         Say Y here to allow AF_RXRPC to use IPV6 UDP as well as IPV4 UDP as
+         its network transport.
+
 
 config AF_RXRPC_DEBUG
        bool "RxRPC dynamic debugging"
index 10f3f48..8fc6ea3 100644 (file)
@@ -22,6 +22,7 @@ af-rxrpc-y := \
        peer_object.o \
        recvmsg.o \
        security.o \
+       sendmsg.o \
        skbuff.o \
        utils.o
 
index 88effad..09f81be 100644 (file)
 #include <linux/net.h>
 #include <linux/slab.h>
 #include <linux/skbuff.h>
+#include <linux/random.h>
 #include <linux/poll.h>
 #include <linux/proc_fs.h>
 #include <linux/key-type.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
+#define CREATE_TRACE_POINTS
 #include "ar-internal.h"
 
 MODULE_DESCRIPTION("RxRPC network protocol");
@@ -104,19 +106,25 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
        case AF_INET:
                if (srx->transport_len < sizeof(struct sockaddr_in))
                        return -EINVAL;
-               _debug("INET: %x @ %pI4",
-                      ntohs(srx->transport.sin.sin_port),
-                      &srx->transport.sin.sin_addr);
                tail = offsetof(struct sockaddr_rxrpc, transport.sin.__pad);
                break;
 
+#ifdef CONFIG_AF_RXRPC_IPV6
        case AF_INET6:
+               if (srx->transport_len < sizeof(struct sockaddr_in6))
+                       return -EINVAL;
+               tail = offsetof(struct sockaddr_rxrpc, transport) +
+                       sizeof(struct sockaddr_in6);
+               break;
+#endif
+
        default:
                return -EAFNOSUPPORT;
        }
 
        if (tail < len)
                memset((void *)srx + tail, 0, len - tail);
+       _debug("INET: %pISp", &srx->transport);
        return 0;
 }
 
@@ -153,15 +161,15 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
        }
 
        if (rx->srx.srx_service) {
-               write_lock_bh(&local->services_lock);
-               list_for_each_entry(prx, &local->services, listen_link) {
+               write_lock(&local->services_lock);
+               hlist_for_each_entry(prx, &local->services, listen_link) {
                        if (prx->srx.srx_service == rx->srx.srx_service)
                                goto service_in_use;
                }
 
                rx->local = local;
-               list_add_tail(&rx->listen_link, &local->services);
-               write_unlock_bh(&local->services_lock);
+               hlist_add_head_rcu(&rx->listen_link, &local->services);
+               write_unlock(&local->services_lock);
 
                rx->sk.sk_state = RXRPC_SERVER_BOUND;
        } else {
@@ -174,7 +182,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
        return 0;
 
 service_in_use:
-       write_unlock_bh(&local->services_lock);
+       write_unlock(&local->services_lock);
        rxrpc_put_local(local);
        ret = -EADDRINUSE;
 error_unlock:
@@ -191,7 +199,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
 {
        struct sock *sk = sock->sk;
        struct rxrpc_sock *rx = rxrpc_sk(sk);
-       unsigned int max;
+       unsigned int max, old;
        int ret;
 
        _enter("%p,%d", rx, backlog);
@@ -210,9 +218,13 @@ static int rxrpc_listen(struct socket *sock, int backlog)
                        backlog = max;
                else if (backlog < 0 || backlog > max)
                        break;
+               old = sk->sk_max_ack_backlog;
                sk->sk_max_ack_backlog = backlog;
-               rx->sk.sk_state = RXRPC_SERVER_LISTENING;
-               ret = 0;
+               ret = rxrpc_service_prealloc(rx, GFP_KERNEL);
+               if (ret == 0)
+                       rx->sk.sk_state = RXRPC_SERVER_LISTENING;
+               else
+                       sk->sk_max_ack_backlog = old;
                break;
        default:
                ret = -EBUSY;
@@ -230,6 +242,8 @@ static int rxrpc_listen(struct socket *sock, int backlog)
  * @srx: The address of the peer to contact
  * @key: The security context to use (defaults to socket setting)
  * @user_call_ID: The ID to use
+ * @gfp: The allocation constraints
+ * @notify_rx: Where to send notifications instead of socket queue
  *
  * Allow a kernel service to begin a call on the nominated socket.  This just
  * sets up all the internal tracking structures and allocates connection and
@@ -242,7 +256,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
                                           struct sockaddr_rxrpc *srx,
                                           struct key *key,
                                           unsigned long user_call_ID,
-                                          gfp_t gfp)
+                                          gfp_t gfp,
+                                          rxrpc_notify_rx_t notify_rx)
 {
        struct rxrpc_conn_parameters cp;
        struct rxrpc_call *call;
@@ -269,6 +284,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
        cp.exclusive            = false;
        cp.service_id           = srx->srx_service;
        call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp);
+       if (!IS_ERR(call))
+               call->notify_rx = notify_rx;
 
        release_sock(&rx->sk);
        _leave(" = %p", call);
@@ -278,40 +295,39 @@ EXPORT_SYMBOL(rxrpc_kernel_begin_call);
 
 /**
  * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using
+ * @sock: The socket the call is on
  * @call: The call to end
  *
  * Allow a kernel service to end a call it was using.  The call must be
  * complete before this is called (the call should be aborted if necessary).
  */
-void rxrpc_kernel_end_call(struct rxrpc_call *call)
+void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
 {
        _enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
-       rxrpc_remove_user_ID(call->socket, call);
-       rxrpc_put_call(call);
+       rxrpc_release_call(rxrpc_sk(sock->sk), call);
+       rxrpc_put_call(call, rxrpc_call_put_kernel);
 }
 EXPORT_SYMBOL(rxrpc_kernel_end_call);
 
 /**
- * rxrpc_kernel_intercept_rx_messages - Intercept received RxRPC messages
+ * rxrpc_kernel_new_call_notification - Get notifications of new calls
  * @sock: The socket to intercept received messages on
- * @interceptor: The function to pass the messages to
+ * @notify_new_call: Function to be called when new calls appear
+ * @discard_new_call: Function to discard preallocated calls
  *
- * Allow a kernel service to intercept messages heading for the Rx queue on an
- * RxRPC socket.  They get passed to the specified function instead.
- * @interceptor should free the socket buffers it is given.  @interceptor is
- * called with the socket receive queue spinlock held and softirqs disabled -
- * this ensures that the messages will be delivered in the right order.
+ * Allow a kernel service to be given notifications about new calls.
  */
-void rxrpc_kernel_intercept_rx_messages(struct socket *sock,
-                                       rxrpc_interceptor_t interceptor)
+void rxrpc_kernel_new_call_notification(
+       struct socket *sock,
+       rxrpc_notify_new_call_t notify_new_call,
+       rxrpc_discard_new_call_t discard_new_call)
 {
        struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
 
-       _enter("");
-       rx->interceptor = interceptor;
+       rx->notify_new_call = notify_new_call;
+       rx->discard_new_call = discard_new_call;
 }
-
-EXPORT_SYMBOL(rxrpc_kernel_intercept_rx_messages);
+EXPORT_SYMBOL(rxrpc_kernel_new_call_notification);
 
 /*
  * connect an RxRPC socket
@@ -391,6 +407,23 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
 
        switch (rx->sk.sk_state) {
        case RXRPC_UNBOUND:
+               rx->srx.srx_family = AF_RXRPC;
+               rx->srx.srx_service = 0;
+               rx->srx.transport_type = SOCK_DGRAM;
+               rx->srx.transport.family = rx->family;
+               switch (rx->family) {
+               case AF_INET:
+                       rx->srx.transport_len = sizeof(struct sockaddr_in);
+                       break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+               case AF_INET6:
+                       rx->srx.transport_len = sizeof(struct sockaddr_in6);
+                       break;
+#endif
+               default:
+                       ret = -EAFNOSUPPORT;
+                       goto error_unlock;
+               }
                local = rxrpc_lookup_local(&rx->srx);
                if (IS_ERR(local)) {
                        ret = PTR_ERR(local);
@@ -505,15 +538,16 @@ error:
 static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
                               poll_table *wait)
 {
-       unsigned int mask;
        struct sock *sk = sock->sk;
+       struct rxrpc_sock *rx = rxrpc_sk(sk);
+       unsigned int mask;
 
        sock_poll_wait(file, sk_sleep(sk), wait);
        mask = 0;
 
        /* the socket is readable if there are any messages waiting on the Rx
         * queue */
-       if (!skb_queue_empty(&sk->sk_receive_queue))
+       if (!list_empty(&rx->recvmsg_q))
                mask |= POLLIN | POLLRDNORM;
 
        /* the socket is writable if there is space to add new data to the
@@ -540,7 +574,8 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
                return -EAFNOSUPPORT;
 
        /* we support transport protocol UDP/UDP6 only */
-       if (protocol != PF_INET)
+       if (protocol != PF_INET &&
+           IS_ENABLED(CONFIG_AF_RXRPC_IPV6) && protocol != PF_INET6)
                return -EPROTONOSUPPORT;
 
        if (sock->type != SOCK_DGRAM)
@@ -554,6 +589,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
                return -ENOMEM;
 
        sock_init_data(sock, sk);
+       sock_set_flag(sk, SOCK_RCU_FREE);
        sk->sk_state            = RXRPC_UNBOUND;
        sk->sk_write_space      = rxrpc_write_space;
        sk->sk_max_ack_backlog  = 0;
@@ -563,9 +599,12 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
        rx->family = protocol;
        rx->calls = RB_ROOT;
 
-       INIT_LIST_HEAD(&rx->listen_link);
-       INIT_LIST_HEAD(&rx->secureq);
-       INIT_LIST_HEAD(&rx->acceptq);
+       INIT_HLIST_NODE(&rx->listen_link);
+       spin_lock_init(&rx->incoming_lock);
+       INIT_LIST_HEAD(&rx->sock_calls);
+       INIT_LIST_HEAD(&rx->to_be_accepted);
+       INIT_LIST_HEAD(&rx->recvmsg_q);
+       rwlock_init(&rx->recvmsg_lock);
        rwlock_init(&rx->call_lock);
        memset(&rx->srx, 0, sizeof(rx->srx));
 
@@ -573,6 +612,39 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
        return 0;
 }
 
+/*
+ * Kill all the calls on a socket and shut it down.
+ */
+static int rxrpc_shutdown(struct socket *sock, int flags)
+{
+       struct sock *sk = sock->sk;
+       struct rxrpc_sock *rx = rxrpc_sk(sk);
+       int ret = 0;
+
+       _enter("%p,%d", sk, flags);
+
+       if (flags != SHUT_RDWR)
+               return -EOPNOTSUPP;
+       if (sk->sk_state == RXRPC_CLOSE)
+               return -ESHUTDOWN;
+
+       lock_sock(sk);
+
+       spin_lock_bh(&sk->sk_receive_queue.lock);
+       if (sk->sk_state < RXRPC_CLOSE) {
+               sk->sk_state = RXRPC_CLOSE;
+               sk->sk_shutdown = SHUTDOWN_MASK;
+       } else {
+               ret = -ESHUTDOWN;
+       }
+       spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+       rxrpc_discard_prealloc(rx);
+
+       release_sock(sk);
+       return ret;
+}
+
 /*
  * RxRPC socket destructor
  */
@@ -611,13 +683,14 @@ static int rxrpc_release_sock(struct sock *sk)
 
        ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1);
 
-       if (!list_empty(&rx->listen_link)) {
-               write_lock_bh(&rx->local->services_lock);
-               list_del(&rx->listen_link);
-               write_unlock_bh(&rx->local->services_lock);
+       if (!hlist_unhashed(&rx->listen_link)) {
+               write_lock(&rx->local->services_lock);
+               hlist_del_rcu(&rx->listen_link);
+               write_unlock(&rx->local->services_lock);
        }
 
        /* try to flush out this socket */
+       rxrpc_discard_prealloc(rx);
        rxrpc_release_calls_on_socket(rx);
        flush_workqueue(rxrpc_workqueue);
        rxrpc_purge_queue(&sk->sk_receive_queue);
@@ -666,7 +739,7 @@ static const struct proto_ops rxrpc_rpc_ops = {
        .poll           = rxrpc_poll,
        .ioctl          = sock_no_ioctl,
        .listen         = rxrpc_listen,
-       .shutdown       = sock_no_shutdown,
+       .shutdown       = rxrpc_shutdown,
        .setsockopt     = rxrpc_setsockopt,
        .getsockopt     = sock_no_getsockopt,
        .sendmsg        = rxrpc_sendmsg,
@@ -697,7 +770,13 @@ static int __init af_rxrpc_init(void)
 
        BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb));
 
-       rxrpc_epoch = get_seconds();
+       get_random_bytes(&rxrpc_epoch, sizeof(rxrpc_epoch));
+       rxrpc_epoch |= RXRPC_RANDOM_EPOCH;
+       get_random_bytes(&rxrpc_client_conn_ids.cur,
+                        sizeof(rxrpc_client_conn_ids.cur));
+       rxrpc_client_conn_ids.cur &= 0x3fffffff;
+       if (rxrpc_client_conn_ids.cur == 0)
+               rxrpc_client_conn_ids.cur = 1;
 
        ret = -ENOMEM;
        rxrpc_call_jar = kmem_cache_create(
index ff83fb1..e78c40b 100644 (file)
@@ -35,10 +35,22 @@ struct rxrpc_crypt {
 #define rxrpc_queue_delayed_work(WS,D) \
        queue_delayed_work(rxrpc_workqueue, (WS), (D))
 
-#define rxrpc_queue_call(CALL) rxrpc_queue_work(&(CALL)->processor)
-
 struct rxrpc_connection;
 
+/*
+ * Mark applied to socket buffers.
+ */
+enum rxrpc_skb_mark {
+       RXRPC_SKB_MARK_DATA,            /* data message */
+       RXRPC_SKB_MARK_FINAL_ACK,       /* final ACK received message */
+       RXRPC_SKB_MARK_BUSY,            /* server busy message */
+       RXRPC_SKB_MARK_REMOTE_ABORT,    /* remote abort message */
+       RXRPC_SKB_MARK_LOCAL_ABORT,     /* local abort message */
+       RXRPC_SKB_MARK_NET_ERROR,       /* network error message */
+       RXRPC_SKB_MARK_LOCAL_ERROR,     /* local error message */
+       RXRPC_SKB_MARK_NEW_CALL,        /* local error message */
+};
+
 /*
  * sk_state for RxRPC sockets
  */
@@ -51,20 +63,46 @@ enum {
        RXRPC_CLOSE,                    /* socket is being closed */
 };
 
+/*
+ * Service backlog preallocation.
+ *
+ * This contains circular buffers of preallocated peers, connections and calls
+ * for incoming service calls and their head and tail pointers.  This allows
+ * calls to be set up in the data_ready handler, thereby avoiding the need to
+ * shuffle packets around so much.
+ */
+struct rxrpc_backlog {
+       unsigned short          peer_backlog_head;
+       unsigned short          peer_backlog_tail;
+       unsigned short          conn_backlog_head;
+       unsigned short          conn_backlog_tail;
+       unsigned short          call_backlog_head;
+       unsigned short          call_backlog_tail;
+#define RXRPC_BACKLOG_MAX      32
+       struct rxrpc_peer       *peer_backlog[RXRPC_BACKLOG_MAX];
+       struct rxrpc_connection *conn_backlog[RXRPC_BACKLOG_MAX];
+       struct rxrpc_call       *call_backlog[RXRPC_BACKLOG_MAX];
+};
+
 /*
  * RxRPC socket definition
  */
 struct rxrpc_sock {
        /* WARNING: sk has to be the first member */
        struct sock             sk;
-       rxrpc_interceptor_t     interceptor;    /* kernel service Rx interceptor function */
+       rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */
+       rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */
        struct rxrpc_local      *local;         /* local endpoint */
-       struct list_head        listen_link;    /* link in the local endpoint's listen list */
-       struct list_head        secureq;        /* calls awaiting connection security clearance */
-       struct list_head        acceptq;        /* calls awaiting acceptance */
+       struct hlist_node       listen_link;    /* link in the local endpoint's listen list */
+       struct rxrpc_backlog    *backlog;       /* Preallocation for services */
+       spinlock_t              incoming_lock;  /* Incoming call vs service shutdown lock */
+       struct list_head        sock_calls;     /* List of calls owned by this socket */
+       struct list_head        to_be_accepted; /* calls awaiting acceptance */
+       struct list_head        recvmsg_q;      /* Calls awaiting recvmsg's attention  */
+       rwlock_t                recvmsg_lock;   /* Lock for recvmsg_q */
        struct key              *key;           /* security for this socket */
        struct key              *securities;    /* list of server security descriptors */
-       struct rb_root          calls;          /* outstanding calls on this socket */
+       struct rb_root          calls;          /* User ID -> call mapping */
        unsigned long           flags;
 #define RXRPC_SOCK_CONNECTED           0       /* connect_srx is set */
        rwlock_t                call_lock;      /* lock for calls */
@@ -103,13 +141,16 @@ struct rxrpc_host_header {
  * - max 48 bytes (struct sk_buff::cb)
  */
 struct rxrpc_skb_priv {
-       struct rxrpc_call       *call;          /* call with which associated */
-       unsigned long           resend_at;      /* time in jiffies at which to resend */
+       union {
+               unsigned long   resend_at;      /* time in jiffies at which to resend */
+               struct {
+                       u8      nr_jumbo;       /* Number of jumbo subpackets */
+               };
+       };
        union {
                unsigned int    offset;         /* offset into buffer of next read */
                int             remain;         /* amount of space remaining for next write */
                u32             error;          /* network error code */
-               bool            need_resend;    /* T if needs resending */
        };
 
        struct rxrpc_host_header hdr;           /* RxRPC packet header from this packet */
@@ -117,13 +158,6 @@ struct rxrpc_skb_priv {
 
 #define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb)
 
-enum rxrpc_command {
-       RXRPC_CMD_SEND_DATA,            /* send data message */
-       RXRPC_CMD_SEND_ABORT,           /* request abort generation */
-       RXRPC_CMD_ACCEPT,               /* [server] accept incoming call */
-       RXRPC_CMD_REJECT_BUSY,          /* [server] reject a call as busy */
-};
-
 /*
  * RxRPC security module interface
  */
@@ -150,7 +184,12 @@ struct rxrpc_security {
                             void *);
 
        /* verify the security on a received packet */
-       int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, u32 *);
+       int (*verify_packet)(struct rxrpc_call *, struct sk_buff *,
+                            unsigned int, unsigned int, rxrpc_seq_t, u16);
+
+       /* Locate the data in a received packet that has been verified. */
+       void (*locate_data)(struct rxrpc_call *, struct sk_buff *,
+                           unsigned int *, unsigned int *);
 
        /* issue a challenge */
        int (*issue_challenge)(struct rxrpc_connection *);
@@ -180,9 +219,8 @@ struct rxrpc_local {
        struct list_head        link;
        struct socket           *socket;        /* my UDP socket */
        struct work_struct      processor;
-       struct list_head        services;       /* services listening on this endpoint */
+       struct hlist_head       services;       /* services listening on this endpoint */
        struct rw_semaphore     defrag_sem;     /* control re-enablement of IP DF bit */
-       struct sk_buff_head     accept_queue;   /* incoming calls awaiting acceptance */
        struct sk_buff_head     reject_queue;   /* packets awaiting rejection */
        struct sk_buff_head     event_queue;    /* endpoint event packets awaiting processing */
        struct rb_root          client_conns;   /* Client connections by socket params */
@@ -255,6 +293,9 @@ enum rxrpc_conn_flag {
        RXRPC_CONN_HAS_IDR,             /* Has a client conn ID assigned */
        RXRPC_CONN_IN_SERVICE_CONNS,    /* Conn is in peer->service_conns */
        RXRPC_CONN_IN_CLIENT_CONNS,     /* Conn is in local->client_conns */
+       RXRPC_CONN_EXPOSED,             /* Conn has extra ref for exposure */
+       RXRPC_CONN_DONT_REUSE,          /* Don't reuse this connection */
+       RXRPC_CONN_COUNTED,             /* Counted by rxrpc_nr_client_conns */
 };
 
 /*
@@ -264,18 +305,29 @@ enum rxrpc_conn_event {
        RXRPC_CONN_EV_CHALLENGE,        /* Send challenge packet */
 };
 
+/*
+ * The connection cache state.
+ */
+enum rxrpc_conn_cache_state {
+       RXRPC_CONN_CLIENT_INACTIVE,     /* Conn is not yet listed */
+       RXRPC_CONN_CLIENT_WAITING,      /* Conn is on wait list, waiting for capacity */
+       RXRPC_CONN_CLIENT_ACTIVE,       /* Conn is on active list, doing calls */
+       RXRPC_CONN_CLIENT_CULLED,       /* Conn is culled and delisted, doing calls */
+       RXRPC_CONN_CLIENT_IDLE,         /* Conn is on idle list, doing mostly nothing */
+};
+
 /*
  * The connection protocol state.
  */
 enum rxrpc_conn_proto_state {
        RXRPC_CONN_UNUSED,              /* Connection not yet attempted */
        RXRPC_CONN_CLIENT,              /* Client connection */
+       RXRPC_CONN_SERVICE_PREALLOC,    /* Service connection preallocation */
        RXRPC_CONN_SERVICE_UNSECURED,   /* Service unsecured connection */
        RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */
        RXRPC_CONN_SERVICE,             /* Service secured connection */
        RXRPC_CONN_REMOTELY_ABORTED,    /* Conn aborted by peer */
        RXRPC_CONN_LOCALLY_ABORTED,     /* Conn aborted locally */
-       RXRPC_CONN_NETWORK_ERROR,       /* Conn terminated by network error */
        RXRPC_CONN__NR_STATES
 };
 
@@ -288,23 +340,33 @@ struct rxrpc_connection {
        struct rxrpc_conn_proto proto;
        struct rxrpc_conn_parameters params;
 
-       spinlock_t              channel_lock;
+       atomic_t                usage;
+       struct rcu_head         rcu;
+       struct list_head        cache_link;
 
+       spinlock_t              channel_lock;
+       unsigned char           active_chans;   /* Mask of active channels */
+#define RXRPC_ACTIVE_CHANS_MASK        ((1 << RXRPC_MAXCALLS) - 1)
+       struct list_head        waiting_calls;  /* Calls waiting for channels */
        struct rxrpc_channel {
                struct rxrpc_call __rcu *call;          /* Active call */
                u32                     call_id;        /* ID of current call */
                u32                     call_counter;   /* Call ID counter */
                u32                     last_call;      /* ID of last call */
-               u32                     last_result;    /* Result of last call (0/abort) */
+               u8                      last_type;      /* Type of last packet */
+               u16                     last_service_id;
+               union {
+                       u32             last_seq;
+                       u32             last_abort;
+               };
        } channels[RXRPC_MAXCALLS];
-       wait_queue_head_t       channel_wq;     /* queue to wait for channel to become available */
 
-       struct rcu_head         rcu;
        struct work_struct      processor;      /* connection event processor */
        union {
                struct rb_node  client_node;    /* Node in local->client_conns */
                struct rb_node  service_node;   /* Node in peer->service_conns */
        };
+       struct list_head        proc_link;      /* link in procfs list */
        struct list_head        link;           /* link in master connection list */
        struct sk_buff_head     rx_queue;       /* received conn-level packets */
        const struct rxrpc_security *security;  /* applied security module */
@@ -313,17 +375,15 @@ struct rxrpc_connection {
        struct rxrpc_crypt      csum_iv;        /* packet checksum base */
        unsigned long           flags;
        unsigned long           events;
-       unsigned long           put_time;       /* Time at which last put */
+       unsigned long           idle_timestamp; /* Time at which last became idle */
        spinlock_t              state_lock;     /* state-change lock */
-       atomic_t                usage;
-       enum rxrpc_conn_proto_state state : 8;  /* current state of connection */
+       enum rxrpc_conn_cache_state cache_state;
+       enum rxrpc_conn_proto_state state;      /* current state of connection */
        u32                     local_abort;    /* local abort code */
        u32                     remote_abort;   /* remote abort code */
-       int                     error;          /* local error incurred */
        int                     debug_id;       /* debug ID for printks */
        atomic_t                serial;         /* packet serial number counter */
-       atomic_t                hi_serial;      /* highest serial number received */
-       atomic_t                avail_chans;    /* number of channels available */
+       unsigned int            hi_serial;      /* highest serial number received */
        u8                      size_align;     /* data size alignment (for security) */
        u8                      header_size;    /* rxrpc + security header size */
        u8                      security_size;  /* security header size */
@@ -337,37 +397,21 @@ struct rxrpc_connection {
  */
 enum rxrpc_call_flag {
        RXRPC_CALL_RELEASED,            /* call has been released - no more message to userspace */
-       RXRPC_CALL_TERMINAL_MSG,        /* call has given the socket its final message */
-       RXRPC_CALL_RCVD_LAST,           /* all packets received */
-       RXRPC_CALL_RUN_RTIMER,          /* Tx resend timer started */
-       RXRPC_CALL_TX_SOFT_ACK,         /* sent some soft ACKs */
-       RXRPC_CALL_PROC_BUSY,           /* the processor is busy */
-       RXRPC_CALL_INIT_ACCEPT,         /* acceptance was initiated */
        RXRPC_CALL_HAS_USERID,          /* has a user ID attached */
-       RXRPC_CALL_EXPECT_OOS,          /* expect out of sequence packets */
+       RXRPC_CALL_IS_SERVICE,          /* Call is service call */
+       RXRPC_CALL_EXPOSED,             /* The call was exposed to the world */
+       RXRPC_CALL_RX_LAST,             /* Received the last packet (at rxtx_top) */
+       RXRPC_CALL_TX_LAST,             /* Last packet in Tx buffer (at rxtx_top) */
 };
 
 /*
  * Events that can be raised on a call.
  */
 enum rxrpc_call_event {
-       RXRPC_CALL_EV_RCVD_ACKALL,      /* ACKALL or reply received */
-       RXRPC_CALL_EV_RCVD_BUSY,        /* busy packet received */
-       RXRPC_CALL_EV_RCVD_ABORT,       /* abort packet received */
-       RXRPC_CALL_EV_RCVD_ERROR,       /* network error received */
-       RXRPC_CALL_EV_ACK_FINAL,        /* need to generate final ACK (and release call) */
        RXRPC_CALL_EV_ACK,              /* need to generate ACK */
-       RXRPC_CALL_EV_REJECT_BUSY,      /* need to generate busy message */
        RXRPC_CALL_EV_ABORT,            /* need to generate abort */
-       RXRPC_CALL_EV_CONN_ABORT,       /* local connection abort generated */
-       RXRPC_CALL_EV_RESEND_TIMER,     /* Tx resend timer expired */
+       RXRPC_CALL_EV_TIMER,            /* Timer expired */
        RXRPC_CALL_EV_RESEND,           /* Tx resend required */
-       RXRPC_CALL_EV_DRAIN_RX_OOS,     /* drain the Rx out of sequence queue */
-       RXRPC_CALL_EV_LIFE_TIMER,       /* call's lifetimer ran out */
-       RXRPC_CALL_EV_ACCEPTED,         /* incoming call accepted by userspace app */
-       RXRPC_CALL_EV_SECURED,          /* incoming call's connection is now secure */
-       RXRPC_CALL_EV_POST_ACCEPT,      /* need to post an "accept?" message to the app */
-       RXRPC_CALL_EV_RELEASE,          /* need to release the call's resources */
 };
 
 /*
@@ -379,20 +423,27 @@ enum rxrpc_call_state {
        RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */
        RXRPC_CALL_CLIENT_AWAIT_REPLY,  /* - client awaiting reply */
        RXRPC_CALL_CLIENT_RECV_REPLY,   /* - client receiving reply phase */
-       RXRPC_CALL_CLIENT_FINAL_ACK,    /* - client sending final ACK phase */
+       RXRPC_CALL_SERVER_PREALLOC,     /* - service preallocation */
        RXRPC_CALL_SERVER_SECURING,     /* - server securing request connection */
        RXRPC_CALL_SERVER_ACCEPTING,    /* - server accepting request */
        RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */
        RXRPC_CALL_SERVER_ACK_REQUEST,  /* - server pending ACK of request */
        RXRPC_CALL_SERVER_SEND_REPLY,   /* - server sending reply */
        RXRPC_CALL_SERVER_AWAIT_ACK,    /* - server awaiting final ACK */
-       RXRPC_CALL_COMPLETE,            /* - call completed */
-       RXRPC_CALL_SERVER_BUSY,         /* - call rejected by busy server */
+       RXRPC_CALL_COMPLETE,            /* - call complete */
+       NR__RXRPC_CALL_STATES
+};
+
+/*
+ * Call completion condition (state == RXRPC_CALL_COMPLETE).
+ */
+enum rxrpc_call_completion {
+       RXRPC_CALL_SUCCEEDED,           /* - Normal termination */
        RXRPC_CALL_REMOTELY_ABORTED,    /* - call aborted by peer */
        RXRPC_CALL_LOCALLY_ABORTED,     /* - call aborted locally on error or close */
+       RXRPC_CALL_LOCAL_ERROR,         /* - call failed due to local error */
        RXRPC_CALL_NETWORK_ERROR,       /* - call terminated by network error */
-       RXRPC_CALL_DEAD,                /* - call is dead */
-       NR__RXRPC_CALL_STATES
+       NR__RXRPC_CALL_COMPLETIONS
 };
 
 /*
@@ -402,87 +453,105 @@ enum rxrpc_call_state {
 struct rxrpc_call {
        struct rcu_head         rcu;
        struct rxrpc_connection *conn;          /* connection carrying call */
-       struct rxrpc_sock       *socket;        /* socket responsible */
-       struct timer_list       lifetimer;      /* lifetime remaining on call */
-       struct timer_list       deadspan;       /* reap timer for re-ACK'ing, etc  */
-       struct timer_list       ack_timer;      /* ACK generation timer */
-       struct timer_list       resend_timer;   /* Tx resend timer */
-       struct work_struct      destroyer;      /* call destroyer */
-       struct work_struct      processor;      /* packet processor and ACK generator */
+       struct rxrpc_peer       *peer;          /* Peer record for remote address */
+       struct rxrpc_sock __rcu *socket;        /* socket responsible */
+       unsigned long           ack_at;         /* When deferred ACK needs to happen */
+       unsigned long           resend_at;      /* When next resend needs to happen */
+       unsigned long           expire_at;      /* When the call times out */
+       struct timer_list       timer;          /* Combined event timer */
+       struct work_struct      processor;      /* Event processor */
+       rxrpc_notify_rx_t       notify_rx;      /* kernel service Rx notification function */
        struct list_head        link;           /* link in master call list */
+       struct list_head        chan_wait_link; /* Link in conn->waiting_calls */
        struct hlist_node       error_link;     /* link in error distribution list */
-       struct list_head        accept_link;    /* calls awaiting acceptance */
-       struct rb_node          sock_node;      /* node in socket call tree */
-       struct sk_buff_head     rx_queue;       /* received packets */
-       struct sk_buff_head     rx_oos_queue;   /* packets received out of sequence */
+       struct list_head        accept_link;    /* Link in rx->acceptq */
+       struct list_head        recvmsg_link;   /* Link in rx->recvmsg_q */
+       struct list_head        sock_link;      /* Link in rx->sock_calls */
+       struct rb_node          sock_node;      /* Node in rx->calls */
        struct sk_buff          *tx_pending;    /* Tx socket buffer being filled */
-       wait_queue_head_t       tx_waitq;       /* wait for Tx window space to become available */
+       wait_queue_head_t       waitq;          /* Wait queue for channel or Tx */
        __be32                  crypto_buf[2];  /* Temporary packet crypto buffer */
        unsigned long           user_call_ID;   /* user-defined call ID */
-       unsigned long           creation_jif;   /* time of call creation */
        unsigned long           flags;
        unsigned long           events;
        spinlock_t              lock;
        rwlock_t                state_lock;     /* lock for state transition */
-       atomic_t                usage;
-       atomic_t                skb_count;      /* Outstanding packets on this call */
-       atomic_t                sequence;       /* Tx data packet sequence counter */
-       u32                     local_abort;    /* local abort code */
-       u32                     remote_abort;   /* remote abort code */
-       int                     error_report;   /* Network error (ICMP/local transport) */
+       u32                     abort_code;     /* Local/remote abort code */
        int                     error;          /* Local error incurred */
-       enum rxrpc_call_state   state : 8;      /* current state of call */
+       enum rxrpc_call_state   state;          /* current state of call */
+       enum rxrpc_call_completion completion;  /* Call completion condition */
+       atomic_t                usage;
+       u16                     service_id;     /* service ID */
+       u8                      security_ix;    /* Security type */
+       u32                     call_id;        /* call ID on connection  */
+       u32                     cid;            /* connection ID plus channel index */
        int                     debug_id;       /* debug ID for printks */
-       u8                      channel;        /* connection channel occupied by this call */
 
-       /* transmission-phase ACK management */
-       u8                      acks_head;      /* offset into window of first entry */
-       u8                      acks_tail;      /* offset into window of last entry */
-       u8                      acks_winsz;     /* size of un-ACK'd window */
-       u8                      acks_unacked;   /* lowest unacked packet in last ACK received */
-       int                     acks_latest;    /* serial number of latest ACK received */
-       rxrpc_seq_t             acks_hard;      /* highest definitively ACK'd msg seq */
-       unsigned long           *acks_window;   /* sent packet window
-                                                * - elements are pointers with LSB set if ACK'd
+       /* Rx/Tx circular buffer, depending on phase.
+        *
+        * In the Rx phase, packets are annotated with 0 or the number of the
+        * segment of a jumbo packet each buffer refers to.  There can be up to
+        * 47 segments in a maximum-size UDP packet.
+        *
+        * In the Tx phase, packets are annotated with which buffers have been
+        * acked.
+        */
+#define RXRPC_RXTX_BUFF_SIZE   64
+#define RXRPC_RXTX_BUFF_MASK   (RXRPC_RXTX_BUFF_SIZE - 1)
+#define RXRPC_INIT_RX_WINDOW_SIZE 32
+       struct sk_buff          **rxtx_buffer;
+       u8                      *rxtx_annotations;
+#define RXRPC_TX_ANNO_ACK      0
+#define RXRPC_TX_ANNO_UNACK    1
+#define RXRPC_TX_ANNO_NAK      2
+#define RXRPC_TX_ANNO_RETRANS  3
+#define RXRPC_RX_ANNO_JUMBO    0x3f            /* Jumbo subpacket number + 1 if not zero */
+#define RXRPC_RX_ANNO_JLAST    0x40            /* Set if last element of a jumbo packet */
+#define RXRPC_RX_ANNO_VERIFIED 0x80            /* Set if verified and decrypted */
+       rxrpc_seq_t             tx_hard_ack;    /* Dead slot in buffer; the first transmitted but
+                                                * not hard-ACK'd packet follows this.
+                                                */
+       rxrpc_seq_t             tx_top;         /* Highest Tx slot allocated. */
+       rxrpc_seq_t             rx_hard_ack;    /* Dead slot in buffer; the first received but not
+                                                * consumed packet follows this.
                                                 */
+       rxrpc_seq_t             rx_top;         /* Highest Rx slot allocated. */
+       rxrpc_seq_t             rx_expect_next; /* Expected next packet sequence number */
+       u8                      rx_winsize;     /* Size of Rx window */
+       u8                      tx_winsize;     /* Maximum size of Tx window */
+       u8                      nr_jumbo_bad;   /* Number of jumbo dups/exceeds-windows */
 
        /* receive-phase ACK management */
-       rxrpc_seq_t             rx_data_expect; /* next data seq ID expected to be received */
-       rxrpc_seq_t             rx_data_post;   /* next data seq ID expected to be posted */
-       rxrpc_seq_t             rx_data_recv;   /* last data seq ID encountered by recvmsg */
-       rxrpc_seq_t             rx_data_eaten;  /* last data seq ID consumed by recvmsg */
-       rxrpc_seq_t             rx_first_oos;   /* first packet in rx_oos_queue (or 0) */
-       rxrpc_seq_t             ackr_win_top;   /* top of ACK window (rx_data_eaten is bottom) */
-       rxrpc_seq_t             ackr_prev_seq;  /* previous sequence number received */
        u8                      ackr_reason;    /* reason to ACK */
+       u16                     ackr_skew;      /* skew on packet being ACK'd */
        rxrpc_serial_t          ackr_serial;    /* serial of packet being ACK'd */
-       atomic_t                ackr_not_idle;  /* number of packets in Rx queue */
+       rxrpc_seq_t             ackr_prev_seq;  /* previous sequence number received */
+       unsigned short          rx_pkt_offset;  /* Current recvmsg packet offset */
+       unsigned short          rx_pkt_len;     /* Current recvmsg packet len */
 
-       /* received packet records, 1 bit per record */
-#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
-       unsigned long           ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
+       /* transmission-phase ACK management */
+       rxrpc_serial_t          acks_latest;    /* serial number of latest ACK received */
+};
 
-       u8                      in_clientflag;  /* Copy of conn->in_clientflag */
-       struct rxrpc_local      *local;         /* Local endpoint. */
-       u32                     call_id;        /* call ID on connection  */
-       u32                     cid;            /* connection ID plus channel index */
-       u32                     epoch;          /* epoch of this connection */
-       u16                     service_id;     /* service ID */
+enum rxrpc_call_trace {
+       rxrpc_call_new_client,
+       rxrpc_call_new_service,
+       rxrpc_call_queued,
+       rxrpc_call_queued_ref,
+       rxrpc_call_seen,
+       rxrpc_call_got,
+       rxrpc_call_got_userid,
+       rxrpc_call_got_kernel,
+       rxrpc_call_put,
+       rxrpc_call_put_userid,
+       rxrpc_call_put_kernel,
+       rxrpc_call_put_noqueue,
+       rxrpc_call__nr_trace
 };
 
-/*
- * locally abort an RxRPC call
- */
-static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code)
-{
-       write_lock_bh(&call->state_lock);
-       if (call->state < RXRPC_CALL_COMPLETE) {
-               call->local_abort = abort_code;
-               call->state = RXRPC_CALL_LOCALLY_ABORTED;
-               set_bit(RXRPC_CALL_EV_ABORT, &call->events);
-       }
-       write_unlock_bh(&call->state_lock);
-}
+extern const char rxrpc_call_traces[rxrpc_call__nr_trace][4];
+
+#include <trace/events/rxrpc.h>
 
 /*
  * af_rxrpc.c
@@ -495,70 +564,171 @@ extern struct workqueue_struct *rxrpc_workqueue;
 /*
  * call_accept.c
  */
+int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t);
+void rxrpc_discard_prealloc(struct rxrpc_sock *);
+struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *,
+                                          struct rxrpc_connection *,
+                                          struct sk_buff *);
 void rxrpc_accept_incoming_calls(struct rxrpc_local *);
-struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long);
+struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long,
+                                    rxrpc_notify_rx_t);
 int rxrpc_reject_call(struct rxrpc_sock *);
 
 /*
  * call_event.c
  */
-void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
-void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
+void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool);
 void rxrpc_process_call(struct work_struct *);
 
 /*
  * call_object.c
  */
+extern const char *const rxrpc_call_states[];
+extern const char *const rxrpc_call_completions[];
 extern unsigned int rxrpc_max_call_lifetime;
-extern unsigned int rxrpc_dead_call_expiry;
 extern struct kmem_cache *rxrpc_call_jar;
 extern struct list_head rxrpc_calls;
 extern rwlock_t rxrpc_call_lock;
 
 struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
+struct rxrpc_call *rxrpc_alloc_call(gfp_t);
 struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
                                         struct rxrpc_conn_parameters *,
                                         struct sockaddr_rxrpc *,
                                         unsigned long, gfp_t);
-struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *,
-                                      struct rxrpc_connection *,
-                                      struct sk_buff *);
-void rxrpc_release_call(struct rxrpc_call *);
+void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
+                        struct sk_buff *);
+void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *);
 void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
-void __rxrpc_put_call(struct rxrpc_call *);
+bool __rxrpc_queue_call(struct rxrpc_call *);
+bool rxrpc_queue_call(struct rxrpc_call *);
+void rxrpc_see_call(struct rxrpc_call *);
+void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace);
+void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace);
+void rxrpc_cleanup_call(struct rxrpc_call *);
 void __exit rxrpc_destroy_all_calls(void);
 
+static inline bool rxrpc_is_service_call(const struct rxrpc_call *call)
+{
+       return test_bit(RXRPC_CALL_IS_SERVICE, &call->flags);
+}
+
+static inline bool rxrpc_is_client_call(const struct rxrpc_call *call)
+{
+       return !rxrpc_is_service_call(call);
+}
+
+/*
+ * Transition a call to the complete state.
+ */
+static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call,
+                                              enum rxrpc_call_completion compl,
+                                              u32 abort_code,
+                                              int error)
+{
+       if (call->state < RXRPC_CALL_COMPLETE) {
+               call->abort_code = abort_code;
+               call->error = error;
+               call->completion = compl,
+               call->state = RXRPC_CALL_COMPLETE;
+               return true;
+       }
+       return false;
+}
+
+static inline bool rxrpc_set_call_completion(struct rxrpc_call *call,
+                                            enum rxrpc_call_completion compl,
+                                            u32 abort_code,
+                                            int error)
+{
+       bool ret;
+
+       write_lock_bh(&call->state_lock);
+       ret = __rxrpc_set_call_completion(call, compl, abort_code, error);
+       write_unlock_bh(&call->state_lock);
+       return ret;
+}
+
+/*
+ * Record that a call successfully completed.
+ */
+static inline bool __rxrpc_call_completed(struct rxrpc_call *call)
+{
+       return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0);
+}
+
+static inline bool rxrpc_call_completed(struct rxrpc_call *call)
+{
+       bool ret;
+
+       write_lock_bh(&call->state_lock);
+       ret = __rxrpc_call_completed(call);
+       write_unlock_bh(&call->state_lock);
+       return ret;
+}
+
+/*
+ * Record that a call is locally aborted.
+ */
+static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call,
+                                     rxrpc_seq_t seq,
+                                     u32 abort_code, int error)
+{
+       trace_rxrpc_abort(why, call->cid, call->call_id, seq,
+                         abort_code, error);
+       return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED,
+                                          abort_code, error);
+}
+
+static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call,
+                                   rxrpc_seq_t seq, u32 abort_code, int error)
+{
+       bool ret;
+
+       write_lock_bh(&call->state_lock);
+       ret = __rxrpc_abort_call(why, call, seq, abort_code, error);
+       write_unlock_bh(&call->state_lock);
+       return ret;
+}
+
 /*
  * conn_client.c
  */
+extern unsigned int rxrpc_max_client_connections;
+extern unsigned int rxrpc_reap_client_connections;
+extern unsigned int rxrpc_conn_idle_client_expiry;
+extern unsigned int rxrpc_conn_idle_client_fast_expiry;
 extern struct idr rxrpc_client_conn_ids;
 
 void rxrpc_destroy_client_conn_ids(void);
 int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
                       struct sockaddr_rxrpc *, gfp_t);
-void rxrpc_unpublish_client_conn(struct rxrpc_connection *);
+void rxrpc_expose_client_call(struct rxrpc_call *);
+void rxrpc_disconnect_client_call(struct rxrpc_call *);
+void rxrpc_put_client_conn(struct rxrpc_connection *);
+void __exit rxrpc_destroy_all_client_connections(void);
 
 /*
  * conn_event.c
  */
 void rxrpc_process_connection(struct work_struct *);
-void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *);
-void rxrpc_reject_packets(struct rxrpc_local *);
 
 /*
  * conn_object.c
  */
 extern unsigned int rxrpc_connection_expiry;
 extern struct list_head rxrpc_connections;
+extern struct list_head rxrpc_connection_proc_list;
 extern rwlock_t rxrpc_connection_lock;
 
 int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
 struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
 struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
                                                   struct sk_buff *);
-void __rxrpc_disconnect_call(struct rxrpc_call *);
+void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *);
 void rxrpc_disconnect_call(struct rxrpc_call *);
-void rxrpc_put_connection(struct rxrpc_connection *);
+void rxrpc_kill_connection(struct rxrpc_connection *);
+void __rxrpc_put_connection(struct rxrpc_connection *);
 void __exit rxrpc_destroy_all_connections(void);
 
 static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
@@ -582,6 +752,20 @@ struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *con
        return atomic_inc_not_zero(&conn->usage) ? conn : NULL;
 }
 
+static inline void rxrpc_put_connection(struct rxrpc_connection *conn)
+{
+       if (!conn)
+               return;
+
+       if (rxrpc_conn_is_client(conn)) {
+               if (atomic_dec_and_test(&conn->usage))
+                       rxrpc_put_client_conn(conn);
+       } else {
+               if (atomic_dec_return(&conn->usage) == 1)
+                       __rxrpc_put_connection(conn);
+       }
+}
+
 static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn)
 {
        if (!rxrpc_get_connection_maybe(conn))
@@ -596,17 +780,14 @@ static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn)
  */
 struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *,
                                                     struct sk_buff *);
-struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *,
-                                                  struct sockaddr_rxrpc *,
-                                                  struct sk_buff *);
+struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t);
+void rxrpc_new_incoming_connection(struct rxrpc_connection *, struct sk_buff *);
 void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
 
 /*
  * input.c
  */
 void rxrpc_data_ready(struct sock *);
-int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, bool);
-void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *);
 
 /*
  * insecure.c
@@ -668,6 +849,7 @@ extern unsigned int rxrpc_idle_ack_delay;
 extern unsigned int rxrpc_rx_window_size;
 extern unsigned int rxrpc_rx_mtu;
 extern unsigned int rxrpc_rx_jumbo_max;
+extern unsigned int rxrpc_resend_timeout;
 
 extern const char *const rxrpc_pkts[];
 extern const s8 rxrpc_ack_priority[];
@@ -677,10 +859,9 @@ extern const char *rxrpc_acks(u8 reason);
 /*
  * output.c
  */
-extern unsigned int rxrpc_resend_timeout;
-
+int rxrpc_send_call_packet(struct rxrpc_call *, u8);
 int rxrpc_send_data_packet(struct rxrpc_connection *, struct sk_buff *);
-int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
+void rxrpc_reject_packets(struct rxrpc_local *);
 
 /*
  * peer_event.c
@@ -696,10 +877,13 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *,
 struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *,
                                     struct sockaddr_rxrpc *, gfp_t);
 struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t);
+struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *,
+                                             struct rxrpc_peer *);
 
-static inline void rxrpc_get_peer(struct rxrpc_peer *peer)
+static inline struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
 {
        atomic_inc(&peer->usage);
+       return peer;
 }
 
 static inline
@@ -718,14 +902,13 @@ static inline void rxrpc_put_peer(struct rxrpc_peer *peer)
 /*
  * proc.c
  */
-extern const char *const rxrpc_call_states[];
 extern const struct file_operations rxrpc_call_seq_fops;
 extern const struct file_operations rxrpc_connection_seq_fops;
 
 /*
  * recvmsg.c
  */
-void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *);
+void rxrpc_notify_socket(struct rxrpc_call *);
 int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
 
 /*
@@ -743,10 +926,21 @@ void rxrpc_exit_security(void);
 int rxrpc_init_client_conn_security(struct rxrpc_connection *);
 int rxrpc_init_server_conn_security(struct rxrpc_connection *);
 
+/*
+ * sendmsg.c
+ */
+int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
+
 /*
  * skbuff.c
  */
+void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *);
 void rxrpc_packet_destructor(struct sk_buff *);
+void rxrpc_new_skb(struct sk_buff *);
+void rxrpc_see_skb(struct sk_buff *);
+void rxrpc_get_skb(struct sk_buff *);
+void rxrpc_free_skb(struct sk_buff *);
+void rxrpc_purge_queue(struct sk_buff_head *);
 
 /*
  * sysctl.c
@@ -764,6 +958,23 @@ static inline void rxrpc_sysctl_exit(void) {}
  */
 int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
 
+static inline bool before(u32 seq1, u32 seq2)
+{
+        return (s32)(seq1 - seq2) < 0;
+}
+static inline bool before_eq(u32 seq1, u32 seq2)
+{
+        return (s32)(seq1 - seq2) <= 0;
+}
+static inline bool after(u32 seq1, u32 seq2)
+{
+        return (s32)(seq1 - seq2) > 0;
+}
+static inline bool after_eq(u32 seq1, u32 seq2)
+{
+        return (s32)(seq1 - seq2) >= 0;
+}
+
 /*
  * debug tracing
  */
@@ -846,11 +1057,12 @@ do {                                                             \
 
 #define ASSERTCMP(X, OP, Y)                                            \
 do {                                                                   \
-       unsigned long _x = (unsigned long)(X);                          \
-       unsigned long _y = (unsigned long)(Y);                          \
+       __typeof__(X) _x = (X);                                         \
+       __typeof__(Y) _y = (__typeof__(X))(Y);                          \
        if (unlikely(!(_x OP _y))) {                                    \
-               pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n",                        \
-                      _x, _x, #OP, _y, _y);                            \
+               pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \
+                      (unsigned long)_x, (unsigned long)_x, #OP,       \
+                      (unsigned long)_y, (unsigned long)_y);           \
                BUG();                                                  \
        }                                                               \
 } while (0)
@@ -865,11 +1077,12 @@ do {                                                             \
 
 #define ASSERTIFCMP(C, X, OP, Y)                                       \
 do {                                                                   \
-       unsigned long _x = (unsigned long)(X);                          \
-       unsigned long _y = (unsigned long)(Y);                          \
+       __typeof__(X) _x = (X);                                         \
+       __typeof__(Y) _y = (__typeof__(X))(Y);                          \
        if (unlikely((C) && !(_x OP _y))) {                             \
                pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \
-                      _x, _x, #OP, _y, _y);                            \
+                      (unsigned long)_x, (unsigned long)_x, #OP,       \
+                      (unsigned long)_y, (unsigned long)_y);           \
                BUG();                                                  \
        }                                                               \
 } while (0)
@@ -893,54 +1106,3 @@ do {                                              \
 } while (0)
 
 #endif /* __KDEBUGALL */
-
-/*
- * socket buffer accounting / leak finding
- */
-static inline void __rxrpc_new_skb(struct sk_buff *skb, const char *fn)
-{
-       //_net("new skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
-       //atomic_inc(&rxrpc_n_skbs);
-}
-
-#define rxrpc_new_skb(skb) __rxrpc_new_skb((skb), __func__)
-
-static inline void __rxrpc_kill_skb(struct sk_buff *skb, const char *fn)
-{
-       //_net("kill skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
-       //atomic_dec(&rxrpc_n_skbs);
-}
-
-#define rxrpc_kill_skb(skb) __rxrpc_kill_skb((skb), __func__)
-
-static inline void __rxrpc_free_skb(struct sk_buff *skb, const char *fn)
-{
-       if (skb) {
-               CHECK_SLAB_OKAY(&skb->users);
-               //_net("free skb %p %s [%d]",
-               //     skb, fn, atomic_read(&rxrpc_n_skbs));
-               //atomic_dec(&rxrpc_n_skbs);
-               kfree_skb(skb);
-       }
-}
-
-#define rxrpc_free_skb(skb) __rxrpc_free_skb((skb), __func__)
-
-static inline void rxrpc_purge_queue(struct sk_buff_head *list)
-{
-       struct sk_buff *skb;
-       while ((skb = skb_dequeue((list))) != NULL)
-               rxrpc_free_skb(skb);
-}
-
-#define rxrpc_get_call(CALL)                           \
-do {                                                   \
-       CHECK_SLAB_OKAY(&(CALL)->usage);                \
-       if (atomic_inc_return(&(CALL)->usage) == 1)     \
-               BUG();                                  \
-} while (0)
-
-#define rxrpc_put_call(CALL)                           \
-do {                                                   \
-       __rxrpc_put_call(CALL);                         \
-} while (0)
index 9bae21e..26c293e 100644 (file)
 #include <linux/in6.h>
 #include <linux/icmp.h>
 #include <linux/gfp.h>
+#include <linux/circ_buf.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <net/ip.h>
 #include "ar-internal.h"
 
 /*
- * generate a connection-level abort
+ * Preallocate a single service call, connection and peer and, if possible,
+ * give them a user ID and attach the user's side of the ID to them.
  */
-static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx,
-                     struct rxrpc_wire_header *whdr)
+static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
+                                     struct rxrpc_backlog *b,
+                                     rxrpc_notify_rx_t notify_rx,
+                                     rxrpc_user_attach_call_t user_attach_call,
+                                     unsigned long user_call_ID, gfp_t gfp)
 {
-       struct msghdr msg;
-       struct kvec iov[1];
-       size_t len;
-       int ret;
+       const void *here = __builtin_return_address(0);
+       struct rxrpc_call *call;
+       int max, tmp;
+       unsigned int size = RXRPC_BACKLOG_MAX;
+       unsigned int head, tail, call_head, call_tail;
+
+       max = rx->sk.sk_max_ack_backlog;
+       tmp = rx->sk.sk_ack_backlog;
+       if (tmp >= max) {
+               _leave(" = -ENOBUFS [full %u]", max);
+               return -ENOBUFS;
+       }
+       max -= tmp;
+
+       /* We don't need more conns and peers than we have calls, but on the
+        * other hand, we shouldn't ever use more peers than conns or conns
+        * than calls.
+        */
+       call_head = b->call_backlog_head;
+       call_tail = READ_ONCE(b->call_backlog_tail);
+       tmp = CIRC_CNT(call_head, call_tail, size);
+       if (tmp >= max) {
+               _leave(" = -ENOBUFS [enough %u]", tmp);
+               return -ENOBUFS;
+       }
+       max = tmp + 1;
+
+       head = b->peer_backlog_head;
+       tail = READ_ONCE(b->peer_backlog_tail);
+       if (CIRC_CNT(head, tail, size) < max) {
+               struct rxrpc_peer *peer = rxrpc_alloc_peer(rx->local, gfp);
+               if (!peer)
+                       return -ENOMEM;
+               b->peer_backlog[head] = peer;
+               smp_store_release(&b->peer_backlog_head,
+                                 (head + 1) & (size - 1));
+       }
+
+       head = b->conn_backlog_head;
+       tail = READ_ONCE(b->conn_backlog_tail);
+       if (CIRC_CNT(head, tail, size) < max) {
+               struct rxrpc_connection *conn;
+
+               conn = rxrpc_prealloc_service_connection(gfp);
+               if (!conn)
+                       return -ENOMEM;
+               b->conn_backlog[head] = conn;
+               smp_store_release(&b->conn_backlog_head,
+                                 (head + 1) & (size - 1));
+       }
 
-       _enter("%d,,", local->debug_id);
+       /* Now it gets complicated, because calls get registered with the
+        * socket here, particularly if a user ID is preassigned by the user.
+        */
+       call = rxrpc_alloc_call(gfp);
+       if (!call)
+               return -ENOMEM;
+       call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
+       call->state = RXRPC_CALL_SERVER_PREALLOC;
 
-       whdr->type      = RXRPC_PACKET_TYPE_BUSY;
-       whdr->serial    = htonl(1);
+       trace_rxrpc_call(call, rxrpc_call_new_service,
+                        atomic_read(&call->usage),
+                        here, (const void *)user_call_ID);
 
-       msg.msg_name    = &srx->transport.sin;
-       msg.msg_namelen = sizeof(srx->transport.sin);
-       msg.msg_control = NULL;
-       msg.msg_controllen = 0;
-       msg.msg_flags   = 0;
+       write_lock(&rx->call_lock);
+       if (user_attach_call) {
+               struct rxrpc_call *xcall;
+               struct rb_node *parent, **pp;
+
+               /* Check the user ID isn't already in use */
+               pp = &rx->calls.rb_node;
+               parent = NULL;
+               while (*pp) {
+                       parent = *pp;
+                       xcall = rb_entry(parent, struct rxrpc_call, sock_node);
+                       if (user_call_ID < call->user_call_ID)
+                               pp = &(*pp)->rb_left;
+                       else if (user_call_ID > call->user_call_ID)
+                               pp = &(*pp)->rb_right;
+                       else
+                               goto id_in_use;
+               }
 
-       iov[0].iov_base = whdr;
-       iov[0].iov_len  = sizeof(*whdr);
+               call->user_call_ID = user_call_ID;
+               call->notify_rx = notify_rx;
+               rxrpc_get_call(call, rxrpc_call_got_kernel);
+               user_attach_call(call, user_call_ID);
+               rxrpc_get_call(call, rxrpc_call_got_userid);
+               rb_link_node(&call->sock_node, parent, pp);
+               rb_insert_color(&call->sock_node, &rx->calls);
+               set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+       }
 
-       len = iov[0].iov_len;
+       list_add(&call->sock_link, &rx->sock_calls);
 
-       _proto("Tx BUSY %%1");
+       write_unlock(&rx->call_lock);
 
-       ret = kernel_sendmsg(local->socket, &msg, iov, 1, len);
-       if (ret < 0) {
-               _leave(" = -EAGAIN [sendmsg failed: %d]", ret);
-               return -EAGAIN;
-       }
+       write_lock(&rxrpc_call_lock);
+       list_add_tail(&call->link, &rxrpc_calls);
+       write_unlock(&rxrpc_call_lock);
 
-       _leave(" = 0");
+       b->call_backlog[call_head] = call;
+       smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1));
+       _leave(" = 0 [%d -> %lx]", call->debug_id, user_call_ID);
        return 0;
+
+id_in_use:
+       write_unlock(&rx->call_lock);
+       rxrpc_cleanup_call(call);
+       _leave(" = -EBADSLT");
+       return -EBADSLT;
 }
 
 /*
- * accept an incoming call that needs peer, transport and/or connection setting
- * up
+ * Preallocate sufficient service connections, calls and peers to cover the
+ * entire backlog of a socket.  When a new call comes in, if we don't have
+ * sufficient of each available, the call gets rejected as busy or ignored.
+ *
+ * The backlog is replenished when a connection is accepted or rejected.
  */
-static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
-                                     struct rxrpc_sock *rx,
-                                     struct sk_buff *skb,
-                                     struct sockaddr_rxrpc *srx)
+int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp)
 {
-       struct rxrpc_connection *conn;
-       struct rxrpc_skb_priv *sp, *nsp;
-       struct rxrpc_call *call;
-       struct sk_buff *notification;
-       int ret;
+       struct rxrpc_backlog *b = rx->backlog;
 
-       _enter("");
+       if (!b) {
+               b = kzalloc(sizeof(struct rxrpc_backlog), gfp);
+               if (!b)
+                       return -ENOMEM;
+               rx->backlog = b;
+       }
+
+       if (rx->discard_new_call)
+               return 0;
+
+       while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp) == 0)
+               ;
 
-       sp = rxrpc_skb(skb);
+       return 0;
+}
 
-       /* get a notification message to send to the server app */
-       notification = alloc_skb(0, GFP_NOFS);
-       if (!notification) {
-               _debug("no memory");
-               ret = -ENOMEM;
-               goto error_nofree;
+/*
+ * Discard the preallocation on a service.
+ */
+void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
+{
+       struct rxrpc_backlog *b = rx->backlog;
+       unsigned int size = RXRPC_BACKLOG_MAX, head, tail;
+
+       if (!b)
+               return;
+       rx->backlog = NULL;
+
+       /* Make sure that there aren't any incoming calls in progress before we
+        * clear the preallocation buffers.
+        */
+       spin_lock_bh(&rx->incoming_lock);
+       spin_unlock_bh(&rx->incoming_lock);
+
+       head = b->peer_backlog_head;
+       tail = b->peer_backlog_tail;
+       while (CIRC_CNT(head, tail, size) > 0) {
+               struct rxrpc_peer *peer = b->peer_backlog[tail];
+               kfree(peer);
+               tail = (tail + 1) & (size - 1);
        }
-       rxrpc_new_skb(notification);
-       notification->mark = RXRPC_SKB_MARK_NEW_CALL;
-
-       conn = rxrpc_incoming_connection(local, srx, skb);
-       if (IS_ERR(conn)) {
-               _debug("no conn");
-               ret = PTR_ERR(conn);
-               goto error;
+
+       head = b->conn_backlog_head;
+       tail = b->conn_backlog_tail;
+       while (CIRC_CNT(head, tail, size) > 0) {
+               struct rxrpc_connection *conn = b->conn_backlog[tail];
+               write_lock(&rxrpc_connection_lock);
+               list_del(&conn->link);
+               list_del(&conn->proc_link);
+               write_unlock(&rxrpc_connection_lock);
+               kfree(conn);
+               tail = (tail + 1) & (size - 1);
        }
 
-       call = rxrpc_incoming_call(rx, conn, skb);
-       rxrpc_put_connection(conn);
-       if (IS_ERR(call)) {
-               _debug("no call");
-               ret = PTR_ERR(call);
-               goto error;
+       head = b->call_backlog_head;
+       tail = b->call_backlog_tail;
+       while (CIRC_CNT(head, tail, size) > 0) {
+               struct rxrpc_call *call = b->call_backlog[tail];
+               if (rx->discard_new_call) {
+                       _debug("discard %lx", call->user_call_ID);
+                       rx->discard_new_call(call, call->user_call_ID);
+                       rxrpc_put_call(call, rxrpc_call_put_kernel);
+               }
+               rxrpc_call_completed(call);
+               rxrpc_release_call(rx, call);
+               rxrpc_put_call(call, rxrpc_call_put);
+               tail = (tail + 1) & (size - 1);
        }
 
-       /* attach the call to the socket */
-       read_lock_bh(&local->services_lock);
-       if (rx->sk.sk_state == RXRPC_CLOSE)
-               goto invalid_service;
+       kfree(b);
+}
 
-       write_lock(&rx->call_lock);
-       if (!test_and_set_bit(RXRPC_CALL_INIT_ACCEPT, &call->flags)) {
-               rxrpc_get_call(call);
-
-               spin_lock(&call->conn->state_lock);
-               if (sp->hdr.securityIndex > 0 &&
-                   call->conn->state == RXRPC_CONN_SERVICE_UNSECURED) {
-                       _debug("await conn sec");
-                       list_add_tail(&call->accept_link, &rx->secureq);
-                       call->conn->state = RXRPC_CONN_SERVICE_CHALLENGING;
-                       set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events);
-                       rxrpc_queue_conn(call->conn);
-               } else {
-                       _debug("conn ready");
-                       call->state = RXRPC_CALL_SERVER_ACCEPTING;
-                       list_add_tail(&call->accept_link, &rx->acceptq);
-                       rxrpc_get_call(call);
-                       atomic_inc(&call->skb_count);
-                       nsp = rxrpc_skb(notification);
-                       nsp->call = call;
-
-                       ASSERTCMP(atomic_read(&call->usage), >=, 3);
-
-                       _debug("notify");
-                       spin_lock(&call->lock);
-                       ret = rxrpc_queue_rcv_skb(call, notification, true,
-                                                 false);
-                       spin_unlock(&call->lock);
-                       notification = NULL;
-                       BUG_ON(ret < 0);
+/*
+ * Allocate a new incoming call from the prealloc pool, along with a connection
+ * and a peer as necessary.
+ */
+static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
+                                                   struct rxrpc_local *local,
+                                                   struct rxrpc_connection *conn,
+                                                   struct sk_buff *skb)
+{
+       struct rxrpc_backlog *b = rx->backlog;
+       struct rxrpc_peer *peer, *xpeer;
+       struct rxrpc_call *call;
+       unsigned short call_head, conn_head, peer_head;
+       unsigned short call_tail, conn_tail, peer_tail;
+       unsigned short call_count, conn_count;
+
+       /* #calls >= #conns >= #peers must hold true. */
+       call_head = smp_load_acquire(&b->call_backlog_head);
+       call_tail = b->call_backlog_tail;
+       call_count = CIRC_CNT(call_head, call_tail, RXRPC_BACKLOG_MAX);
+       conn_head = smp_load_acquire(&b->conn_backlog_head);
+       conn_tail = b->conn_backlog_tail;
+       conn_count = CIRC_CNT(conn_head, conn_tail, RXRPC_BACKLOG_MAX);
+       ASSERTCMP(conn_count, >=, call_count);
+       peer_head = smp_load_acquire(&b->peer_backlog_head);
+       peer_tail = b->peer_backlog_tail;
+       ASSERTCMP(CIRC_CNT(peer_head, peer_tail, RXRPC_BACKLOG_MAX), >=,
+                 conn_count);
+
+       if (call_count == 0)
+               return NULL;
+
+       if (!conn) {
+               /* No connection.  We're going to need a peer to start off
+                * with.  If one doesn't yet exist, use a spare from the
+                * preallocation set.  We dump the address into the spare in
+                * anticipation - and to save on stack space.
+                */
+               xpeer = b->peer_backlog[peer_tail];
+               if (rxrpc_extract_addr_from_skb(&xpeer->srx, skb) < 0)
+                       return NULL;
+
+               peer = rxrpc_lookup_incoming_peer(local, xpeer);
+               if (peer == xpeer) {
+                       b->peer_backlog[peer_tail] = NULL;
+                       smp_store_release(&b->peer_backlog_tail,
+                                         (peer_tail + 1) &
+                                         (RXRPC_BACKLOG_MAX - 1));
                }
-               spin_unlock(&call->conn->state_lock);
 
-               _debug("queued");
+               /* Now allocate and set up the connection */
+               conn = b->conn_backlog[conn_tail];
+               b->conn_backlog[conn_tail] = NULL;
+               smp_store_release(&b->conn_backlog_tail,
+                                 (conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1));
+               rxrpc_get_local(local);
+               conn->params.local = local;
+               conn->params.peer = peer;
+               rxrpc_new_incoming_connection(conn, skb);
+       } else {
+               rxrpc_get_connection(conn);
        }
-       write_unlock(&rx->call_lock);
 
-       _debug("process");
-       rxrpc_fast_process_packet(call, skb);
+       /* And now we can allocate and set up a new call */
+       call = b->call_backlog[call_tail];
+       b->call_backlog[call_tail] = NULL;
+       smp_store_release(&b->call_backlog_tail,
+                         (call_tail + 1) & (RXRPC_BACKLOG_MAX - 1));
 
-       _debug("done");
-       read_unlock_bh(&local->services_lock);
-       rxrpc_free_skb(notification);
-       rxrpc_put_call(call);
-       _leave(" = 0");
-       return 0;
-
-invalid_service:
-       _debug("invalid");
-       read_unlock_bh(&local->services_lock);
-
-       read_lock_bh(&call->state_lock);
-       if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
-           !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
-               rxrpc_get_call(call);
-               rxrpc_queue_call(call);
-       }
-       read_unlock_bh(&call->state_lock);
-       rxrpc_put_call(call);
-       ret = -ECONNREFUSED;
-error:
-       rxrpc_free_skb(notification);
-error_nofree:
-       _leave(" = %d", ret);
-       return ret;
+       rxrpc_see_call(call);
+       call->conn = conn;
+       call->peer = rxrpc_get_peer(conn->params.peer);
+       return call;
 }
 
 /*
- * accept incoming calls that need peer, transport and/or connection setting up
- * - the packets we get are all incoming client DATA packets that have seq == 1
+ * Set up a new incoming call.  Called in BH context with the RCU read lock
+ * held.
+ *
+ * If this is for a kernel service, when we allocate the call, it will have
+ * three refs on it: (1) the kernel service, (2) the user_call_ID tree, (3) the
+ * retainer ref obtained from the backlog buffer.  Prealloc calls for userspace
+ * services only have the ref from the backlog buffer.  We want to pass this
+ * ref to non-BH context to dispose of.
+ *
+ * If we want to report an error, we mark the skb with the packet type and
+ * abort code and return NULL.
  */
-void rxrpc_accept_incoming_calls(struct rxrpc_local *local)
+struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
+                                          struct rxrpc_connection *conn,
+                                          struct sk_buff *skb)
 {
-       struct rxrpc_skb_priv *sp;
-       struct sockaddr_rxrpc srx;
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
        struct rxrpc_sock *rx;
-       struct rxrpc_wire_header whdr;
-       struct sk_buff *skb;
-       int ret;
-
-       _enter("%d", local->debug_id);
+       struct rxrpc_call *call;
 
-       skb = skb_dequeue(&local->accept_queue);
-       if (!skb) {
-               _leave("\n");
-               return;
-       }
+       _enter("");
 
-       _net("incoming call skb %p", skb);
-
-       sp = rxrpc_skb(skb);
-
-       /* Set up a response packet header in case we need it */
-       whdr.epoch      = htonl(sp->hdr.epoch);
-       whdr.cid        = htonl(sp->hdr.cid);
-       whdr.callNumber = htonl(sp->hdr.callNumber);
-       whdr.seq        = htonl(sp->hdr.seq);
-       whdr.serial     = 0;
-       whdr.flags      = 0;
-       whdr.type       = 0;
-       whdr.userStatus = 0;
-       whdr.securityIndex = sp->hdr.securityIndex;
-       whdr._rsvd      = 0;
-       whdr.serviceId  = htons(sp->hdr.serviceId);
-
-       if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
-               goto drop;
-
-       /* get the socket providing the service */
-       read_lock_bh(&local->services_lock);
-       list_for_each_entry(rx, &local->services, listen_link) {
-               if (rx->srx.srx_service == sp->hdr.serviceId &&
-                   rx->sk.sk_state != RXRPC_CLOSE)
+       /* Get the socket providing the service */
+       hlist_for_each_entry_rcu_bh(rx, &local->services, listen_link) {
+               if (rx->srx.srx_service == sp->hdr.serviceId)
                        goto found_service;
        }
-       read_unlock_bh(&local->services_lock);
-       goto invalid_service;
+
+       trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+                         RX_INVALID_OPERATION, EOPNOTSUPP);
+       skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
+       skb->priority = RX_INVALID_OPERATION;
+       _leave(" = NULL [service]");
+       return NULL;
 
 found_service:
-       _debug("found service %hd", rx->srx.srx_service);
-       if (sk_acceptq_is_full(&rx->sk))
-               goto backlog_full;
-       sk_acceptq_added(&rx->sk);
-       sock_hold(&rx->sk);
-       read_unlock_bh(&local->services_lock);
-
-       ret = rxrpc_accept_incoming_call(local, rx, skb, &srx);
-       if (ret < 0)
-               sk_acceptq_removed(&rx->sk);
-       sock_put(&rx->sk);
-       switch (ret) {
-       case -ECONNRESET: /* old calls are ignored */
-       case -ECONNABORTED: /* aborted calls are reaborted or ignored */
-       case 0:
-               return;
-       case -ECONNREFUSED:
-               goto invalid_service;
-       case -EBUSY:
-               goto busy;
-       case -EKEYREJECTED:
-               goto security_mismatch;
+       spin_lock(&rx->incoming_lock);
+       if (rx->sk.sk_state == RXRPC_CLOSE) {
+               trace_rxrpc_abort("CLS", sp->hdr.cid, sp->hdr.callNumber,
+                                 sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN);
+               skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
+               skb->priority = RX_INVALID_OPERATION;
+               _leave(" = NULL [close]");
+               call = NULL;
+               goto out;
+       }
+
+       call = rxrpc_alloc_incoming_call(rx, local, conn, skb);
+       if (!call) {
+               skb->mark = RXRPC_SKB_MARK_BUSY;
+               _leave(" = NULL [busy]");
+               call = NULL;
+               goto out;
+       }
+
+       /* Make the call live. */
+       rxrpc_incoming_call(rx, call, skb);
+       conn = call->conn;
+
+       if (rx->notify_new_call)
+               rx->notify_new_call(&rx->sk, call, call->user_call_ID);
+
+       spin_lock(&conn->state_lock);
+       switch (conn->state) {
+       case RXRPC_CONN_SERVICE_UNSECURED:
+               conn->state = RXRPC_CONN_SERVICE_CHALLENGING;
+               set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events);
+               rxrpc_queue_conn(call->conn);
+               break;
+
+       case RXRPC_CONN_SERVICE:
+               write_lock(&call->state_lock);
+               if (rx->discard_new_call)
+                       call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
+               else
+                       call->state = RXRPC_CALL_SERVER_ACCEPTING;
+               write_unlock(&call->state_lock);
+               break;
+
+       case RXRPC_CONN_REMOTELY_ABORTED:
+               rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+                                         conn->remote_abort, ECONNABORTED);
+               break;
+       case RXRPC_CONN_LOCALLY_ABORTED:
+               rxrpc_abort_call("CON", call, sp->hdr.seq,
+                                conn->local_abort, ECONNABORTED);
+               break;
        default:
                BUG();
        }
+       spin_unlock(&conn->state_lock);
 
-backlog_full:
-       read_unlock_bh(&local->services_lock);
-busy:
-       rxrpc_busy(local, &srx, &whdr);
-       rxrpc_free_skb(skb);
-       return;
+       if (call->state == RXRPC_CALL_SERVER_ACCEPTING)
+               rxrpc_notify_socket(call);
 
-drop:
-       rxrpc_free_skb(skb);
-       return;
+       /* We have to discard the prealloc queue's ref here and rely on a
+        * combination of the RCU read lock and refs held either by the socket
+        * (recvmsg queue, to-be-accepted queue or user ID tree) or the kernel
+        * service to prevent the call from being deallocated too early.
+        */
+       rxrpc_put_call(call, rxrpc_call_put);
 
-invalid_service:
-       skb->priority = RX_INVALID_OPERATION;
-       rxrpc_reject_packet(local, skb);
-       return;
-
-       /* can't change connection security type mid-flow */
-security_mismatch:
-       skb->priority = RX_PROTOCOL_ERROR;
-       rxrpc_reject_packet(local, skb);
-       return;
+       _leave(" = %p{%d}", call, call->debug_id);
+out:
+       spin_unlock(&rx->incoming_lock);
+       return call;
 }
 
 /*
@@ -286,7 +421,8 @@ security_mismatch:
  * - assign the user call ID to the call at the front of the queue
  */
 struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
-                                    unsigned long user_call_ID)
+                                    unsigned long user_call_ID,
+                                    rxrpc_notify_rx_t notify_rx)
 {
        struct rxrpc_call *call;
        struct rb_node *parent, **pp;
@@ -298,12 +434,13 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
 
        write_lock(&rx->call_lock);
 
-       ret = -ENODATA;
-       if (list_empty(&rx->acceptq))
-               goto out;
+       if (list_empty(&rx->to_be_accepted)) {
+               write_unlock(&rx->call_lock);
+               kleave(" = -ENODATA [empty]");
+               return ERR_PTR(-ENODATA);
+       }
 
        /* check the user ID isn't already in use */
-       ret = -EBADSLT;
        pp = &rx->calls.rb_node;
        parent = NULL;
        while (*pp) {
@@ -315,62 +452,59 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
                else if (user_call_ID > call->user_call_ID)
                        pp = &(*pp)->rb_right;
                else
-                       goto out;
+                       goto id_in_use;
        }
 
-       /* dequeue the first call and check it's still valid */
-       call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link);
+       /* Dequeue the first call and check it's still valid.  We gain
+        * responsibility for the queue's reference.
+        */
+       call = list_entry(rx->to_be_accepted.next,
+                         struct rxrpc_call, accept_link);
        list_del_init(&call->accept_link);
        sk_acceptq_removed(&rx->sk);
+       rxrpc_see_call(call);
 
        write_lock_bh(&call->state_lock);
        switch (call->state) {
        case RXRPC_CALL_SERVER_ACCEPTING:
                call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
                break;
-       case RXRPC_CALL_REMOTELY_ABORTED:
-       case RXRPC_CALL_LOCALLY_ABORTED:
-               ret = -ECONNABORTED;
+       case RXRPC_CALL_COMPLETE:
+               ret = call->error;
                goto out_release;
-       case RXRPC_CALL_NETWORK_ERROR:
-               ret = call->conn->error;
-               goto out_release;
-       case RXRPC_CALL_DEAD:
-               ret = -ETIME;
-               goto out_discard;
        default:
                BUG();
        }
 
        /* formalise the acceptance */
+       call->notify_rx = notify_rx;
        call->user_call_ID = user_call_ID;
+       rxrpc_get_call(call, rxrpc_call_got_userid);
        rb_link_node(&call->sock_node, parent, pp);
        rb_insert_color(&call->sock_node, &rx->calls);
        if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags))
                BUG();
-       if (test_and_set_bit(RXRPC_CALL_EV_ACCEPTED, &call->events))
-               BUG();
-       rxrpc_queue_call(call);
 
-       rxrpc_get_call(call);
        write_unlock_bh(&call->state_lock);
        write_unlock(&rx->call_lock);
+       rxrpc_notify_socket(call);
+       rxrpc_service_prealloc(rx, GFP_KERNEL);
        _leave(" = %p{%d}", call, call->debug_id);
        return call;
 
-       /* if the call is already dying or dead, then we leave the socket's ref
-        * on it to be released by rxrpc_dead_call_expired() as induced by
-        * rxrpc_release_call() */
 out_release:
        _debug("release %p", call);
-       if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
-           !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
-               rxrpc_queue_call(call);
-out_discard:
        write_unlock_bh(&call->state_lock);
-       _debug("discard %p", call);
-out:
        write_unlock(&rx->call_lock);
+       rxrpc_release_call(rx, call);
+       rxrpc_put_call(call, rxrpc_call_put);
+       goto out;
+
+id_in_use:
+       ret = -EBADSLT;
+       write_unlock(&rx->call_lock);
+out:
+       rxrpc_service_prealloc(rx, GFP_KERNEL);
        _leave(" = %d", ret);
        return ERR_PTR(ret);
 }
@@ -382,6 +516,7 @@ out:
 int rxrpc_reject_call(struct rxrpc_sock *rx)
 {
        struct rxrpc_call *call;
+       bool abort = false;
        int ret;
 
        _enter("");
@@ -390,88 +525,73 @@ int rxrpc_reject_call(struct rxrpc_sock *rx)
 
        write_lock(&rx->call_lock);
 
-       ret = -ENODATA;
-       if (list_empty(&rx->acceptq))
-               goto out;
+       if (list_empty(&rx->to_be_accepted)) {
+               write_unlock(&rx->call_lock);
+               return -ENODATA;
+       }
 
-       /* dequeue the first call and check it's still valid */
-       call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link);
+       /* Dequeue the first call and check it's still valid.  We gain
+        * responsibility for the queue's reference.
+        */
+       call = list_entry(rx->to_be_accepted.next,
+                         struct rxrpc_call, accept_link);
        list_del_init(&call->accept_link);
        sk_acceptq_removed(&rx->sk);
+       rxrpc_see_call(call);
 
        write_lock_bh(&call->state_lock);
        switch (call->state) {
        case RXRPC_CALL_SERVER_ACCEPTING:
-               call->state = RXRPC_CALL_SERVER_BUSY;
-               if (test_and_set_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events))
-                       rxrpc_queue_call(call);
-               ret = 0;
-               goto out_release;
-       case RXRPC_CALL_REMOTELY_ABORTED:
-       case RXRPC_CALL_LOCALLY_ABORTED:
-               ret = -ECONNABORTED;
-               goto out_release;
-       case RXRPC_CALL_NETWORK_ERROR:
-               ret = call->conn->error;
-               goto out_release;
-       case RXRPC_CALL_DEAD:
-               ret = -ETIME;
+               __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, ECONNABORTED);
+               abort = true;
+               /* fall through */
+       case RXRPC_CALL_COMPLETE:
+               ret = call->error;
                goto out_discard;
        default:
                BUG();
        }
 
-       /* if the call is already dying or dead, then we leave the socket's ref
-        * on it to be released by rxrpc_dead_call_expired() as induced by
-        * rxrpc_release_call() */
-out_release:
-       _debug("release %p", call);
-       if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
-           !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
-               rxrpc_queue_call(call);
 out_discard:
        write_unlock_bh(&call->state_lock);
-       _debug("discard %p", call);
-out:
        write_unlock(&rx->call_lock);
+       if (abort) {
+               rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+               rxrpc_release_call(rx, call);
+               rxrpc_put_call(call, rxrpc_call_put);
+       }
+       rxrpc_service_prealloc(rx, GFP_KERNEL);
        _leave(" = %d", ret);
        return ret;
 }
 
-/**
- * rxrpc_kernel_accept_call - Allow a kernel service to accept an incoming call
- * @sock: The socket on which the impending call is waiting
- * @user_call_ID: The tag to attach to the call
+/*
+ * rxrpc_kernel_charge_accept - Charge up socket with preallocated calls
+ * @sock: The socket on which to preallocate
+ * @notify_rx: Event notification function for the call
+ * @user_attach_call: Func to attach call to user_call_ID
+ * @user_call_ID: The tag to attach to the preallocated call
+ * @gfp: The allocation conditions.
  *
- * Allow a kernel service to accept an incoming call, assuming the incoming
- * call is still valid.
- */
-struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock,
-                                           unsigned long user_call_ID)
-{
-       struct rxrpc_call *call;
-
-       _enter(",%lx", user_call_ID);
-       call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID);
-       _leave(" = %p", call);
-       return call;
-}
-EXPORT_SYMBOL(rxrpc_kernel_accept_call);
-
-/**
- * rxrpc_kernel_reject_call - Allow a kernel service to reject an incoming call
- * @sock: The socket on which the impending call is waiting
+ * Charge up the socket with preallocated calls, each with a user ID.  A
+ * function should be provided to effect the attachment from the user's side.
+ * The user is given a ref to hold on the call.
  *
- * Allow a kernel service to reject an incoming call with a BUSY message,
- * assuming the incoming call is still valid.
+ * Note that the call may be come connected before this function returns.
  */
-int rxrpc_kernel_reject_call(struct socket *sock)
+int rxrpc_kernel_charge_accept(struct socket *sock,
+                              rxrpc_notify_rx_t notify_rx,
+                              rxrpc_user_attach_call_t user_attach_call,
+                              unsigned long user_call_ID, gfp_t gfp)
 {
-       int ret;
+       struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+       struct rxrpc_backlog *b = rx->backlog;
 
-       _enter("");
-       ret = rxrpc_reject_call(rxrpc_sk(sock->sk));
-       _leave(" = %d", ret);
-       return ret;
+       if (sock->sk->sk_state == RXRPC_CLOSE)
+               return -ESHUTDOWN;
+
+       return rxrpc_service_prealloc_one(rx, b, notify_rx,
+                                         user_attach_call, user_call_ID,
+                                         gfp);
 }
-EXPORT_SYMBOL(rxrpc_kernel_reject_call);
+EXPORT_SYMBOL(rxrpc_kernel_charge_accept);
index e60cf65..6143204 100644 (file)
 #include "ar-internal.h"
 
 /*
- * propose an ACK be sent
+ * Set the timer
  */
-void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
-                        u32 serial, bool immediate)
+static void rxrpc_set_timer(struct rxrpc_call *call)
 {
-       unsigned long expiry;
-       s8 prior = rxrpc_ack_priority[ack_reason];
-
-       ASSERTCMP(prior, >, 0);
-
-       _enter("{%d},%s,%%%x,%u",
-              call->debug_id, rxrpc_acks(ack_reason), serial, immediate);
-
-       if (prior < rxrpc_ack_priority[call->ackr_reason]) {
-               if (immediate)
-                       goto cancel_timer;
-               return;
-       }
-
-       /* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
-        * numbers */
-       if (prior == rxrpc_ack_priority[call->ackr_reason]) {
-               if (prior <= 4)
-                       call->ackr_serial = serial;
-               if (immediate)
-                       goto cancel_timer;
-               return;
-       }
+       unsigned long t, now = jiffies;
 
-       call->ackr_reason = ack_reason;
-       call->ackr_serial = serial;
-
-       switch (ack_reason) {
-       case RXRPC_ACK_DELAY:
-               _debug("run delay timer");
-               expiry = rxrpc_soft_ack_delay;
-               goto run_timer;
-
-       case RXRPC_ACK_IDLE:
-               if (!immediate) {
-                       _debug("run defer timer");
-                       expiry = rxrpc_idle_ack_delay;
-                       goto run_timer;
-               }
-               goto cancel_timer;
+       _enter("{%ld,%ld,%ld:%ld}",
+              call->ack_at - now, call->resend_at - now, call->expire_at - now,
+              call->timer.expires - now);
+       
+       read_lock_bh(&call->state_lock);
 
-       case RXRPC_ACK_REQUESTED:
-               expiry = rxrpc_requested_ack_delay;
-               if (!expiry)
-                       goto cancel_timer;
-               if (!immediate || serial == 1) {
-                       _debug("run defer timer");
-                       goto run_timer;
+       if (call->state < RXRPC_CALL_COMPLETE) {
+               t = call->ack_at;
+               if (time_before(call->resend_at, t))
+                       t = call->resend_at;
+               if (time_before(call->expire_at, t))
+                       t = call->expire_at;
+               if (!timer_pending(&call->timer) ||
+                   time_before(t, call->timer.expires)) {
+                       _debug("set timer %ld", t - now);
+                       mod_timer(&call->timer, t);
                }
-
-       default:
-               _debug("immediate ACK");
-               goto cancel_timer;
        }
-
-run_timer:
-       expiry += jiffies;
-       if (!timer_pending(&call->ack_timer) ||
-           time_after(call->ack_timer.expires, expiry))
-               mod_timer(&call->ack_timer, expiry);
-       return;
-
-cancel_timer:
-       _debug("cancel timer %%%u", serial);
-       try_to_del_timer_sync(&call->ack_timer);
-       read_lock_bh(&call->state_lock);
-       if (call->state <= RXRPC_CALL_COMPLETE &&
-           !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events))
-               rxrpc_queue_call(call);
        read_unlock_bh(&call->state_lock);
 }
 
 /*
- * propose an ACK be sent, locking the call structure
+ * propose an ACK be sent
  */
-void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
-                      u32 serial, bool immediate)
+static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
+                               u16 skew, u32 serial, bool immediate,
+                               bool background)
 {
+       unsigned long now, ack_at, expiry = rxrpc_soft_ack_delay;
        s8 prior = rxrpc_ack_priority[ack_reason];
 
-       if (prior > rxrpc_ack_priority[call->ackr_reason]) {
-               spin_lock_bh(&call->lock);
-               __rxrpc_propose_ACK(call, ack_reason, serial, immediate);
-               spin_unlock_bh(&call->lock);
-       }
-}
-
-/*
- * set the resend timer
- */
-static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend,
-                            unsigned long resend_at)
-{
-       read_lock_bh(&call->state_lock);
-       if (call->state >= RXRPC_CALL_COMPLETE)
-               resend = 0;
-
-       if (resend & 1) {
-               _debug("SET RESEND");
-               set_bit(RXRPC_CALL_EV_RESEND, &call->events);
-       }
-
-       if (resend & 2) {
-               _debug("MODIFY RESEND TIMER");
-               set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
-               mod_timer(&call->resend_timer, resend_at);
-       } else {
-               _debug("KILL RESEND TIMER");
-               del_timer_sync(&call->resend_timer);
-               clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
-               clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
-       }
-       read_unlock_bh(&call->state_lock);
-}
-
-/*
- * resend packets
- */
-static void rxrpc_resend(struct rxrpc_call *call)
-{
-       struct rxrpc_wire_header *whdr;
-       struct rxrpc_skb_priv *sp;
-       struct sk_buff *txb;
-       unsigned long *p_txb, resend_at;
-       bool stop;
-       int loop;
-       u8 resend;
-
-       _enter("{%d,%d,%d,%d},",
-              call->acks_hard, call->acks_unacked,
-              atomic_read(&call->sequence),
-              CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));
-
-       stop = false;
-       resend = 0;
-       resend_at = 0;
-
-       for (loop = call->acks_tail;
-            loop != call->acks_head || stop;
-            loop = (loop + 1) &  (call->acks_winsz - 1)
-            ) {
-               p_txb = call->acks_window + loop;
-               smp_read_barrier_depends();
-               if (*p_txb & 1)
-                       continue;
-
-               txb = (struct sk_buff *) *p_txb;
-               sp = rxrpc_skb(txb);
-
-               if (sp->need_resend) {
-                       sp->need_resend = false;
-
-                       /* each Tx packet has a new serial number */
-                       sp->hdr.serial = atomic_inc_return(&call->conn->serial);
-
-                       whdr = (struct rxrpc_wire_header *)txb->head;
-                       whdr->serial = htonl(sp->hdr.serial);
-
-                       _proto("Tx DATA %%%u { #%d }",
-                              sp->hdr.serial, sp->hdr.seq);
-                       if (rxrpc_send_data_packet(call->conn, txb) < 0) {
-                               stop = true;
-                               sp->resend_at = jiffies + 3;
-                       } else {
-                               sp->resend_at =
-                                       jiffies + rxrpc_resend_timeout;
-                       }
-               }
-
-               if (time_after_eq(jiffies + 1, sp->resend_at)) {
-                       sp->need_resend = true;
-                       resend |= 1;
-               } else if (resend & 2) {
-                       if (time_before(sp->resend_at, resend_at))
-                               resend_at = sp->resend_at;
-               } else {
-                       resend_at = sp->resend_at;
-                       resend |= 2;
-               }
-       }
-
-       rxrpc_set_resend(call, resend, resend_at);
-       _leave("");
-}
-
-/*
- * handle resend timer expiry
- */
-static void rxrpc_resend_timer(struct rxrpc_call *call)
-{
-       struct rxrpc_skb_priv *sp;
-       struct sk_buff *txb;
-       unsigned long *p_txb, resend_at;
-       int loop;
-       u8 resend;
-
-       _enter("%d,%d,%d",
-              call->acks_tail, call->acks_unacked, call->acks_head);
-
-       if (call->state >= RXRPC_CALL_COMPLETE)
-               return;
-
-       resend = 0;
-       resend_at = 0;
-
-       for (loop = call->acks_unacked;
-            loop != call->acks_head;
-            loop = (loop + 1) &  (call->acks_winsz - 1)
-            ) {
-               p_txb = call->acks_window + loop;
-               smp_read_barrier_depends();
-               txb = (struct sk_buff *) (*p_txb & ~1);
-               sp = rxrpc_skb(txb);
-
-               ASSERT(!(*p_txb & 1));
-
-               if (sp->need_resend) {
-                       ;
-               } else if (time_after_eq(jiffies + 1, sp->resend_at)) {
-                       sp->need_resend = true;
-                       resend |= 1;
-               } else if (resend & 2) {
-                       if (time_before(sp->resend_at, resend_at))
-                               resend_at = sp->resend_at;
-               } else {
-                       resend_at = sp->resend_at;
-                       resend |= 2;
-               }
-       }
-
-       rxrpc_set_resend(call, resend, resend_at);
-       _leave("");
-}
-
-/*
- * process soft ACKs of our transmitted packets
- * - these indicate packets the peer has or has not received, but hasn't yet
- *   given to the consumer, and so can still be discarded and re-requested
- */
-static int rxrpc_process_soft_ACKs(struct rxrpc_call *call,
-                                  struct rxrpc_ackpacket *ack,
-                                  struct sk_buff *skb)
-{
-       struct rxrpc_skb_priv *sp;
-       struct sk_buff *txb;
-       unsigned long *p_txb, resend_at;
-       int loop;
-       u8 sacks[RXRPC_MAXACKS], resend;
-
-       _enter("{%d,%d},{%d},",
-              call->acks_hard,
-              CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz),
-              ack->nAcks);
-
-       if (skb_copy_bits(skb, 0, sacks, ack->nAcks) < 0)
-               goto protocol_error;
-
-       resend = 0;
-       resend_at = 0;
-       for (loop = 0; loop < ack->nAcks; loop++) {
-               p_txb = call->acks_window;
-               p_txb += (call->acks_tail + loop) & (call->acks_winsz - 1);
-               smp_read_barrier_depends();
-               txb = (struct sk_buff *) (*p_txb & ~1);
-               sp = rxrpc_skb(txb);
-
-               switch (sacks[loop]) {
-               case RXRPC_ACK_TYPE_ACK:
-                       sp->need_resend = false;
-                       *p_txb |= 1;
-                       break;
-               case RXRPC_ACK_TYPE_NACK:
-                       sp->need_resend = true;
-                       *p_txb &= ~1;
-                       resend = 1;
-                       break;
-               default:
-                       _debug("Unsupported ACK type %d", sacks[loop]);
-                       goto protocol_error;
-               }
-       }
-
-       smp_mb();
-       call->acks_unacked = (call->acks_tail + loop) & (call->acks_winsz - 1);
-
-       /* anything not explicitly ACK'd is implicitly NACK'd, but may just not
-        * have been received or processed yet by the far end */
-       for (loop = call->acks_unacked;
-            loop != call->acks_head;
-            loop = (loop + 1) &  (call->acks_winsz - 1)
-            ) {
-               p_txb = call->acks_window + loop;
-               smp_read_barrier_depends();
-               txb = (struct sk_buff *) (*p_txb & ~1);
-               sp = rxrpc_skb(txb);
+       _enter("{%d},%s,%%%x,%u",
+              call->debug_id, rxrpc_acks(ack_reason), serial, immediate);
 
-               if (*p_txb & 1) {
-                       /* packet must have been discarded */
-                       sp->need_resend = true;
-                       *p_txb &= ~1;
-                       resend |= 1;
-               } else if (sp->need_resend) {
-                       ;
-               } else if (time_after_eq(jiffies + 1, sp->resend_at)) {
-                       sp->need_resend = true;
-                       resend |= 1;
-               } else if (resend & 2) {
-                       if (time_before(sp->resend_at, resend_at))
-                               resend_at = sp->resend_at;
-               } else {
-                       resend_at = sp->resend_at;
-                       resend |= 2;
+       /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
+        * numbers, but we don't alter the timeout.
+        */
+       _debug("prior %u %u vs %u %u",
+              ack_reason, prior,
+              call->ackr_reason, rxrpc_ack_priority[call->ackr_reason]);
+       if (ack_reason == call->ackr_reason) {
+               if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) {
+                       call->ackr_serial = serial;
+                       call->ackr_skew = skew;
                }
+               if (!immediate)
+                       return;
+       } else if (prior > rxrpc_ack_priority[call->ackr_reason]) {
+               call->ackr_reason = ack_reason;
+               call->ackr_serial = serial;
+               call->ackr_skew = skew;
        }
 
-       rxrpc_set_resend(call, resend, resend_at);
-       _leave(" = 0");
-       return 0;
-
-protocol_error:
-       _leave(" = -EPROTO");
-       return -EPROTO;
-}
-
-/*
- * discard hard-ACK'd packets from the Tx window
- */
-static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard)
-{
-       unsigned long _skb;
-       int tail = call->acks_tail, old_tail;
-       int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz);
+       switch (ack_reason) {
+       case RXRPC_ACK_REQUESTED:
+               if (rxrpc_requested_ack_delay < expiry)
+                       expiry = rxrpc_requested_ack_delay;
+               if (serial == 1)
+                       immediate = false;
+               break;
 
-       _enter("{%u,%u},%u", call->acks_hard, win, hard);
+       case RXRPC_ACK_DELAY:
+               if (rxrpc_soft_ack_delay < expiry)
+                       expiry = rxrpc_soft_ack_delay;
+               break;
 
-       ASSERTCMP(hard - call->acks_hard, <=, win);
+       case RXRPC_ACK_IDLE:
+               if (rxrpc_idle_ack_delay < expiry)
+                       expiry = rxrpc_idle_ack_delay;
+               break;
 
-       while (call->acks_hard < hard) {
-               smp_read_barrier_depends();
-               _skb = call->acks_window[tail] & ~1;
-               rxrpc_free_skb((struct sk_buff *) _skb);
-               old_tail = tail;
-               tail = (tail + 1) & (call->acks_winsz - 1);
-               call->acks_tail = tail;
-               if (call->acks_unacked == old_tail)
-                       call->acks_unacked = tail;
-               call->acks_hard++;
+       default:
+               immediate = true;
+               break;
        }
 
-       wake_up(&call->tx_waitq);
-}
-
-/*
- * clear the Tx window in the event of a failure
- */
-static void rxrpc_clear_tx_window(struct rxrpc_call *call)
-{
-       rxrpc_rotate_tx_window(call, atomic_read(&call->sequence));
-}
-
-/*
- * drain the out of sequence received packet queue into the packet Rx queue
- */
-static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call)
-{
-       struct rxrpc_skb_priv *sp;
-       struct sk_buff *skb;
-       bool terminal;
-       int ret;
-
-       _enter("{%d,%d}", call->rx_data_post, call->rx_first_oos);
-
-       spin_lock_bh(&call->lock);
-
-       ret = -ECONNRESET;
-       if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
-               goto socket_unavailable;
-
-       skb = skb_dequeue(&call->rx_oos_queue);
-       if (skb) {
-               sp = rxrpc_skb(skb);
-
-               _debug("drain OOS packet %d [%d]",
-                      sp->hdr.seq, call->rx_first_oos);
-
-               if (sp->hdr.seq != call->rx_first_oos) {
-                       skb_queue_head(&call->rx_oos_queue, skb);
-                       call->rx_first_oos = rxrpc_skb(skb)->hdr.seq;
-                       _debug("requeue %p {%u}", skb, call->rx_first_oos);
-               } else {
-                       skb->mark = RXRPC_SKB_MARK_DATA;
-                       terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
-                               !(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
-                       ret = rxrpc_queue_rcv_skb(call, skb, true, terminal);
-                       BUG_ON(ret < 0);
-                       _debug("drain #%u", call->rx_data_post);
-                       call->rx_data_post++;
-
-                       /* find out what the next packet is */
-                       skb = skb_peek(&call->rx_oos_queue);
-                       if (skb)
-                               call->rx_first_oos = rxrpc_skb(skb)->hdr.seq;
-                       else
-                               call->rx_first_oos = 0;
-                       _debug("peek %p {%u}", skb, call->rx_first_oos);
+       now = jiffies;
+       if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) {
+               _debug("already scheduled");
+       } else if (immediate || expiry == 0) {
+               _debug("immediate ACK %lx", call->events);
+               if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events) &&
+                   background)
+                       rxrpc_queue_call(call);
+       } else {
+               ack_at = now + expiry;
+               _debug("deferred ACK %ld < %ld", expiry, call->ack_at - now);
+               if (time_before(ack_at, call->ack_at)) {
+                       call->ack_at = ack_at;
+                       rxrpc_set_timer(call);
                }
        }
-
-       ret = 0;
-socket_unavailable:
-       spin_unlock_bh(&call->lock);
-       _leave(" = %d", ret);
-       return ret;
 }
 
 /*
- * insert an out of sequence packet into the buffer
+ * propose an ACK be sent, locking the call structure
  */
-static void rxrpc_insert_oos_packet(struct rxrpc_call *call,
-                                   struct sk_buff *skb)
+void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
+                      u16 skew, u32 serial, bool immediate, bool background)
 {
-       struct rxrpc_skb_priv *sp, *psp;
-       struct sk_buff *p;
-       u32 seq;
-
-       sp = rxrpc_skb(skb);
-       seq = sp->hdr.seq;
-       _enter(",,{%u}", seq);
-
-       skb->destructor = rxrpc_packet_destructor;
-       ASSERTCMP(sp->call, ==, NULL);
-       sp->call = call;
-       rxrpc_get_call(call);
-       atomic_inc(&call->skb_count);
-
-       /* insert into the buffer in sequence order */
        spin_lock_bh(&call->lock);
-
-       skb_queue_walk(&call->rx_oos_queue, p) {
-               psp = rxrpc_skb(p);
-               if (psp->hdr.seq > seq) {
-                       _debug("insert oos #%u before #%u", seq, psp->hdr.seq);
-                       skb_insert(p, skb, &call->rx_oos_queue);
-                       goto inserted;
-               }
-       }
-
-       _debug("append oos #%u", seq);
-       skb_queue_tail(&call->rx_oos_queue, skb);
-inserted:
-
-       /* we might now have a new front to the queue */
-       if (call->rx_first_oos == 0 || seq < call->rx_first_oos)
-               call->rx_first_oos = seq;
-
-       read_lock(&call->state_lock);
-       if (call->state < RXRPC_CALL_COMPLETE &&
-           call->rx_data_post == call->rx_first_oos) {
-               _debug("drain rx oos now");
-               set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events);
-       }
-       read_unlock(&call->state_lock);
-
+       __rxrpc_propose_ACK(call, ack_reason, skew, serial,
+                           immediate, background);
        spin_unlock_bh(&call->lock);
-       _leave(" [stored #%u]", call->rx_first_oos);
 }
 
 /*
- * clear the Tx window on final ACK reception
+ * Perform retransmission of NAK'd and unack'd packets.
  */
-static void rxrpc_zap_tx_window(struct rxrpc_call *call)
-{
-       struct rxrpc_skb_priv *sp;
-       struct sk_buff *skb;
-       unsigned long _skb, *acks_window;
-       u8 winsz = call->acks_winsz;
-       int tail;
-
-       acks_window = call->acks_window;
-       call->acks_window = NULL;
-
-       while (CIRC_CNT(call->acks_head, call->acks_tail, winsz) > 0) {
-               tail = call->acks_tail;
-               smp_read_barrier_depends();
-               _skb = acks_window[tail] & ~1;
-               smp_mb();
-               call->acks_tail = (call->acks_tail + 1) & (winsz - 1);
-
-               skb = (struct sk_buff *) _skb;
-               sp = rxrpc_skb(skb);
-               _debug("+++ clear Tx %u", sp->hdr.seq);
-               rxrpc_free_skb(skb);
-       }
-
-       kfree(acks_window);
-}
-
-/*
- * process the extra information that may be appended to an ACK packet
- */
-static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
-                                 unsigned int latest, int nAcks)
-{
-       struct rxrpc_ackinfo ackinfo;
-       struct rxrpc_peer *peer;
-       unsigned int mtu;
-
-       if (skb_copy_bits(skb, nAcks + 3, &ackinfo, sizeof(ackinfo)) < 0) {
-               _leave(" [no ackinfo]");
-               return;
-       }
-
-       _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
-              latest,
-              ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU),
-              ntohl(ackinfo.rwind), ntohl(ackinfo.jumbo_max));
-
-       mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU));
-
-       peer = call->conn->params.peer;
-       if (mtu < peer->maxdata) {
-               spin_lock_bh(&peer->lock);
-               peer->maxdata = mtu;
-               peer->mtu = mtu + peer->hdrsize;
-               spin_unlock_bh(&peer->lock);
-               _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
-       }
-}
-
-/*
- * process packets in the reception queue
- */
-static int rxrpc_process_rx_queue(struct rxrpc_call *call,
-                                 u32 *_abort_code)
+static void rxrpc_resend(struct rxrpc_call *call)
 {
-       struct rxrpc_ackpacket ack;
+       struct rxrpc_wire_header *whdr;
        struct rxrpc_skb_priv *sp;
        struct sk_buff *skb;
-       bool post_ACK;
-       int latest;
-       u32 hard, tx;
-
-       _enter("");
-
-process_further:
-       skb = skb_dequeue(&call->rx_queue);
-       if (!skb)
-               return -EAGAIN;
-
-       _net("deferred skb %p", skb);
-
-       sp = rxrpc_skb(skb);
-
-       _debug("process %s [st %d]", rxrpc_pkts[sp->hdr.type], call->state);
-
-       post_ACK = false;
-
-       switch (sp->hdr.type) {
-               /* data packets that wind up here have been received out of
-                * order, need security processing or are jumbo packets */
-       case RXRPC_PACKET_TYPE_DATA:
-               _proto("OOSQ DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq);
-
-               /* secured packets must be verified and possibly decrypted */
-               if (call->conn->security->verify_packet(call, skb,
-                                                       _abort_code) < 0)
-                       goto protocol_error;
-
-               rxrpc_insert_oos_packet(call, skb);
-               goto process_further;
-
-               /* partial ACK to process */
-       case RXRPC_PACKET_TYPE_ACK:
-               if (skb_copy_bits(skb, 0, &ack, sizeof(ack)) < 0) {
-                       _debug("extraction failure");
-                       goto protocol_error;
-               }
-               if (!skb_pull(skb, sizeof(ack)))
-                       BUG();
-
-               latest = sp->hdr.serial;
-               hard = ntohl(ack.firstPacket);
-               tx = atomic_read(&call->sequence);
+       rxrpc_seq_t cursor, seq, top;
+       unsigned long resend_at, now;
+       int ix;
+       u8 annotation;
 
-               _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
-                      latest,
-                      ntohs(ack.maxSkew),
-                      hard,
-                      ntohl(ack.previousPacket),
-                      ntohl(ack.serial),
-                      rxrpc_acks(ack.reason),
-                      ack.nAcks);
+       _enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
 
-               rxrpc_extract_ackinfo(call, skb, latest, ack.nAcks);
-
-               if (ack.reason == RXRPC_ACK_PING) {
-                       _proto("Rx ACK %%%u PING Request", latest);
-                       rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
-                                         sp->hdr.serial, true);
-               }
-
-               /* discard any out-of-order or duplicate ACKs */
-               if (latest - call->acks_latest <= 0) {
-                       _debug("discard ACK %d <= %d",
-                              latest, call->acks_latest);
-                       goto discard;
-               }
-               call->acks_latest = latest;
-
-               if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
-                   call->state != RXRPC_CALL_CLIENT_AWAIT_REPLY &&
-                   call->state != RXRPC_CALL_SERVER_SEND_REPLY &&
-                   call->state != RXRPC_CALL_SERVER_AWAIT_ACK)
-                       goto discard;
-
-               _debug("Tx=%d H=%u S=%d", tx, call->acks_hard, call->state);
-
-               if (hard > 0) {
-                       if (hard - 1 > tx) {
-                               _debug("hard-ACK'd packet %d not transmitted"
-                                      " (%d top)",
-                                      hard - 1, tx);
-                               goto protocol_error;
-                       }
+       spin_lock_bh(&call->lock);
 
-                       if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY ||
-                            call->state == RXRPC_CALL_SERVER_AWAIT_ACK) &&
-                           hard > tx) {
-                               call->acks_hard = tx;
-                               goto all_acked;
-                       }
+       cursor = call->tx_hard_ack;
+       top = call->tx_top;
+       ASSERT(before_eq(cursor, top));
+       if (cursor == top)
+               goto out_unlock;
+
+       /* Scan the packet list without dropping the lock and decide which of
+        * the packets in the Tx buffer we're going to resend and what the new
+        * resend timeout will be.
+        */
+       now = jiffies;
+       resend_at = now + rxrpc_resend_timeout;
+       seq = cursor + 1;
+       do {
+               ix = seq & RXRPC_RXTX_BUFF_MASK;
+               annotation = call->rxtx_annotations[ix];
+               if (annotation == RXRPC_TX_ANNO_ACK)
+                       continue;
 
-                       smp_rmb();
-                       rxrpc_rotate_tx_window(call, hard - 1);
-               }
+               skb = call->rxtx_buffer[ix];
+               rxrpc_see_skb(skb);
+               sp = rxrpc_skb(skb);
 
-               if (ack.nAcks > 0) {
-                       if (hard - 1 + ack.nAcks > tx) {
-                               _debug("soft-ACK'd packet %d+%d not"
-                                      " transmitted (%d top)",
-                                      hard - 1, ack.nAcks, tx);
-                               goto protocol_error;
+               if (annotation == RXRPC_TX_ANNO_UNACK) {
+                       if (time_after(sp->resend_at, now)) {
+                               if (time_before(sp->resend_at, resend_at))
+                                       resend_at = sp->resend_at;
+                               continue;
                        }
-
-                       if (rxrpc_process_soft_ACKs(call, &ack, skb) < 0)
-                               goto protocol_error;
                }
-               goto discard;
-
-               /* complete ACK to process */
-       case RXRPC_PACKET_TYPE_ACKALL:
-               goto all_acked;
-
-               /* abort and busy are handled elsewhere */
-       case RXRPC_PACKET_TYPE_BUSY:
-       case RXRPC_PACKET_TYPE_ABORT:
-               BUG();
-
-               /* connection level events - also handled elsewhere */
-       case RXRPC_PACKET_TYPE_CHALLENGE:
-       case RXRPC_PACKET_TYPE_RESPONSE:
-       case RXRPC_PACKET_TYPE_DEBUG:
-               BUG();
-       }
-
-       /* if we've had a hard ACK that covers all the packets we've sent, then
-        * that ends that phase of the operation */
-all_acked:
-       write_lock_bh(&call->state_lock);
-       _debug("ack all %d", call->state);
-
-       switch (call->state) {
-       case RXRPC_CALL_CLIENT_AWAIT_REPLY:
-               call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
-               break;
-       case RXRPC_CALL_SERVER_AWAIT_ACK:
-               _debug("srv complete");
-               call->state = RXRPC_CALL_COMPLETE;
-               post_ACK = true;
-               break;
-       case RXRPC_CALL_CLIENT_SEND_REQUEST:
-       case RXRPC_CALL_SERVER_RECV_REQUEST:
-               goto protocol_error_unlock; /* can't occur yet */
-       default:
-               write_unlock_bh(&call->state_lock);
-               goto discard; /* assume packet left over from earlier phase */
-       }
-
-       write_unlock_bh(&call->state_lock);
-
-       /* if all the packets we sent are hard-ACK'd, then we can discard
-        * whatever we've got left */
-       _debug("clear Tx %d",
-              CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));
-
-       del_timer_sync(&call->resend_timer);
-       clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
-       clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
 
-       if (call->acks_window)
-               rxrpc_zap_tx_window(call);
+               /* Okay, we need to retransmit a packet. */
+               call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS;
+               seq++;
+       } while (before_eq(seq, top));
+
+       call->resend_at = resend_at;
+
+       /* Now go through the Tx window and perform the retransmissions.  We
+        * have to drop the lock for each send.  If an ACK comes in whilst the
+        * lock is dropped, it may clear some of the retransmission markers for
+        * packets that it soft-ACKs.
+        */
+       seq = cursor + 1;
+       do {
+               ix = seq & RXRPC_RXTX_BUFF_MASK;
+               annotation = call->rxtx_annotations[ix];
+               if (annotation != RXRPC_TX_ANNO_RETRANS)
+                       continue;
 
-       if (post_ACK) {
-               /* post the final ACK message for userspace to pick up */
-               _debug("post ACK");
-               skb->mark = RXRPC_SKB_MARK_FINAL_ACK;
-               sp->call = call;
-               rxrpc_get_call(call);
-               atomic_inc(&call->skb_count);
-               spin_lock_bh(&call->lock);
-               if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0)
-                       BUG();
+               skb = call->rxtx_buffer[ix];
+               rxrpc_get_skb(skb);
                spin_unlock_bh(&call->lock);
-               goto process_further;
-       }
-
-discard:
-       rxrpc_free_skb(skb);
-       goto process_further;
-
-protocol_error_unlock:
-       write_unlock_bh(&call->state_lock);
-protocol_error:
-       rxrpc_free_skb(skb);
-       _leave(" = -EPROTO");
-       return -EPROTO;
-}
-
-/*
- * post a message to the socket Rx queue for recvmsg() to pick up
- */
-static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error,
-                             bool fatal)
-{
-       struct rxrpc_skb_priv *sp;
-       struct sk_buff *skb;
-       int ret;
-
-       _enter("{%d,%lx},%u,%u,%d",
-              call->debug_id, call->flags, mark, error, fatal);
-
-       /* remove timers and things for fatal messages */
-       if (fatal) {
-               del_timer_sync(&call->resend_timer);
-               del_timer_sync(&call->ack_timer);
-               clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
-       }
-
-       if (mark != RXRPC_SKB_MARK_NEW_CALL &&
-           !test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
-               _leave("[no userid]");
-               return 0;
-       }
+               sp = rxrpc_skb(skb);
 
-       if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
-               skb = alloc_skb(0, GFP_NOFS);
-               if (!skb)
-                       return -ENOMEM;
+               /* Each Tx packet needs a new serial number */
+               sp->hdr.serial = atomic_inc_return(&call->conn->serial);
 
-               rxrpc_new_skb(skb);
+               whdr = (struct rxrpc_wire_header *)skb->head;
+               whdr->serial = htonl(sp->hdr.serial);
 
-               skb->mark = mark;
+               if (rxrpc_send_data_packet(call->conn, skb) < 0) {
+                       call->resend_at = now + 2;
+                       rxrpc_free_skb(skb);
+                       return;
+               }
 
-               sp = rxrpc_skb(skb);
-               memset(sp, 0, sizeof(*sp));
-               sp->error = error;
-               sp->call = call;
-               rxrpc_get_call(call);
-               atomic_inc(&call->skb_count);
+               if (rxrpc_is_client_call(call))
+                       rxrpc_expose_client_call(call);
+               sp->resend_at = now + rxrpc_resend_timeout;
 
+               rxrpc_free_skb(skb);
                spin_lock_bh(&call->lock);
-               ret = rxrpc_queue_rcv_skb(call, skb, true, fatal);
-               spin_unlock_bh(&call->lock);
-               BUG_ON(ret < 0);
-       }
 
-       return 0;
+               /* We need to clear the retransmit state, but there are two
+                * things we need to be aware of: A new ACK/NAK might have been
+                * received and the packet might have been hard-ACK'd (in which
+                * case it will no longer be in the buffer).
+                */
+               if (after(seq, call->tx_hard_ack) &&
+                   (call->rxtx_annotations[ix] == RXRPC_TX_ANNO_RETRANS ||
+                    call->rxtx_annotations[ix] == RXRPC_TX_ANNO_NAK))
+                       call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK;
+
+               if (after(call->tx_hard_ack, seq))
+                       seq = call->tx_hard_ack;
+               seq++;
+       } while (before_eq(seq, top));
+
+out_unlock:
+       spin_unlock_bh(&call->lock);
+       _leave("");
 }
 
 /*
- * handle background processing of incoming call packets and ACK / abort
- * generation
+ * Handle retransmission and deferred ACK/abort generation.
  */
 void rxrpc_process_call(struct work_struct *work)
 {
        struct rxrpc_call *call =
                container_of(work, struct rxrpc_call, processor);
-       struct rxrpc_wire_header whdr;
-       struct rxrpc_ackpacket ack;
-       struct rxrpc_ackinfo ackinfo;
-       struct msghdr msg;
-       struct kvec iov[5];
-       enum rxrpc_call_event genbit;
-       unsigned long bits;
-       __be32 data, pad;
-       size_t len;
-       int loop, nbit, ioc, ret, mtu;
-       u32 serial, abort_code = RX_PROTOCOL_ERROR;
-       u8 *acks = NULL;
+       unsigned long now;
 
-       //printk("\n--------------------\n");
-       _enter("{%d,%s,%lx} [%lu]",
-              call->debug_id, rxrpc_call_states[call->state], call->events,
-              (jiffies - call->creation_jif) / (HZ / 10));
+       rxrpc_see_call(call);
 
-       if (test_and_set_bit(RXRPC_CALL_PROC_BUSY, &call->flags)) {
-               _debug("XXXXXXXXXXXXX RUNNING ON MULTIPLE CPUS XXXXXXXXXXXXX");
-               return;
-       }
-
-       if (!call->conn)
-               goto skip_msg_init;
-
-       /* there's a good chance we're going to have to send a message, so set
-        * one up in advance */
-       msg.msg_name    = &call->conn->params.peer->srx.transport;
-       msg.msg_namelen = call->conn->params.peer->srx.transport_len;
-       msg.msg_control = NULL;
-       msg.msg_controllen = 0;
-       msg.msg_flags   = 0;
-
-       whdr.epoch      = htonl(call->conn->proto.epoch);
-       whdr.cid        = htonl(call->cid);
-       whdr.callNumber = htonl(call->call_id);
-       whdr.seq        = 0;
-       whdr.type       = RXRPC_PACKET_TYPE_ACK;
-       whdr.flags      = call->conn->out_clientflag;
-       whdr.userStatus = 0;
-       whdr.securityIndex = call->conn->security_ix;
-       whdr._rsvd      = 0;
-       whdr.serviceId  = htons(call->service_id);
-
-       memset(iov, 0, sizeof(iov));
-       iov[0].iov_base = &whdr;
-       iov[0].iov_len  = sizeof(whdr);
-skip_msg_init:
-
-       /* deal with events of a final nature */
-       if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) {
-               enum rxrpc_skb_mark mark;
-               int error;
-
-               clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events);
-               clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events);
-               clear_bit(RXRPC_CALL_EV_ABORT, &call->events);
-
-               error = call->error_report;
-               if (error < RXRPC_LOCAL_ERROR_OFFSET) {
-                       mark = RXRPC_SKB_MARK_NET_ERROR;
-                       _debug("post net error %d", error);
-               } else {
-                       mark = RXRPC_SKB_MARK_LOCAL_ERROR;
-                       error -= RXRPC_LOCAL_ERROR_OFFSET;
-                       _debug("post net local error %d", error);
-               }
-
-               if (rxrpc_post_message(call, mark, error, true) < 0)
-                       goto no_mem;
-               clear_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events);
-               goto kill_ACKs;
-       }
-
-       if (test_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events)) {
-               ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
-
-               clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events);
-               clear_bit(RXRPC_CALL_EV_ABORT, &call->events);
-
-               _debug("post conn abort");
-
-               if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
-                                      call->conn->error, true) < 0)
-                       goto no_mem;
-               clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events);
-               goto kill_ACKs;
-       }
-
-       if (test_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) {
-               whdr.type = RXRPC_PACKET_TYPE_BUSY;
-               genbit = RXRPC_CALL_EV_REJECT_BUSY;
-               goto send_message;
-       }
-
-       if (test_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
-               ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
-
-               if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
-                                      ECONNABORTED, true) < 0)
-                       goto no_mem;
-               whdr.type = RXRPC_PACKET_TYPE_ABORT;
-               data = htonl(call->local_abort);
-               iov[1].iov_base = &data;
-               iov[1].iov_len = sizeof(data);
-               genbit = RXRPC_CALL_EV_ABORT;
-               goto send_message;
-       }
-
-       if (test_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events)) {
-               genbit = RXRPC_CALL_EV_ACK_FINAL;
-
-               ack.bufferSpace = htons(8);
-               ack.maxSkew     = 0;
-               ack.serial      = 0;
-               ack.reason      = RXRPC_ACK_IDLE;
-               ack.nAcks       = 0;
-               call->ackr_reason = 0;
-
-               spin_lock_bh(&call->lock);
-               ack.serial      = htonl(call->ackr_serial);
-               ack.previousPacket = htonl(call->ackr_prev_seq);
-               ack.firstPacket = htonl(call->rx_data_eaten + 1);
-               spin_unlock_bh(&call->lock);
-
-               pad = 0;
+       //printk("\n--------------------\n");
+       _enter("{%d,%s,%lx}",
+              call->debug_id, rxrpc_call_states[call->state], call->events);
 
-               iov[1].iov_base = &ack;
-               iov[1].iov_len  = sizeof(ack);
-               iov[2].iov_base = &pad;
-               iov[2].iov_len  = 3;
-               iov[3].iov_base = &ackinfo;
-               iov[3].iov_len  = sizeof(ackinfo);
-               goto send_ACK;
+recheck_state:
+       if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
+               rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+               goto recheck_state;
        }
 
-       if (call->events & ((1 << RXRPC_CALL_EV_RCVD_BUSY) |
-                           (1 << RXRPC_CALL_EV_RCVD_ABORT))
-           ) {
-               u32 mark;
-
-               if (test_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events))
-                       mark = RXRPC_SKB_MARK_REMOTE_ABORT;
-               else
-                       mark = RXRPC_SKB_MARK_BUSY;
-
-               _debug("post abort/busy");
-               rxrpc_clear_tx_window(call);
-               if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0)
-                       goto no_mem;
-
-               clear_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events);
-               clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
-               goto kill_ACKs;
+       if (call->state == RXRPC_CALL_COMPLETE) {
+               del_timer_sync(&call->timer);
+               goto out_put;
        }
 
-       if (test_and_clear_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events)) {
-               _debug("do implicit ackall");
-               rxrpc_clear_tx_window(call);
+       now = jiffies;
+       if (time_after_eq(now, call->expire_at)) {
+               rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME);
+               set_bit(RXRPC_CALL_EV_ABORT, &call->events);
        }
 
-       if (test_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events)) {
-               write_lock_bh(&call->state_lock);
-               if (call->state <= RXRPC_CALL_COMPLETE) {
-                       call->state = RXRPC_CALL_LOCALLY_ABORTED;
-                       call->local_abort = RX_CALL_TIMEOUT;
-                       set_bit(RXRPC_CALL_EV_ABORT, &call->events);
+       if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) ||
+           time_after_eq(now, call->ack_at)) {
+               call->ack_at = call->expire_at;
+               if (call->ackr_reason) {
+                       rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+                       goto recheck_state;
                }
-               write_unlock_bh(&call->state_lock);
-
-               _debug("post timeout");
-               if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
-                                      ETIME, true) < 0)
-                       goto no_mem;
-
-               clear_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events);
-               goto kill_ACKs;
        }
 
-       /* deal with assorted inbound messages */
-       if (!skb_queue_empty(&call->rx_queue)) {
-               switch (rxrpc_process_rx_queue(call, &abort_code)) {
-               case 0:
-               case -EAGAIN:
-                       break;
-               case -ENOMEM:
-                       goto no_mem;
-               case -EKEYEXPIRED:
-               case -EKEYREJECTED:
-               case -EPROTO:
-                       rxrpc_abort_call(call, abort_code);
-                       goto kill_ACKs;
-               }
-       }
-
-       /* handle resending */
-       if (test_and_clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events))
-               rxrpc_resend_timer(call);
-       if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events))
+       if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) ||
+           time_after_eq(now, call->resend_at)) {
                rxrpc_resend(call);
-
-       /* consider sending an ordinary ACK */
-       if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) {
-               _debug("send ACK: window: %d - %d { %lx }",
-                      call->rx_data_eaten, call->ackr_win_top,
-                      call->ackr_window[0]);
-
-               if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST &&
-                   call->ackr_reason != RXRPC_ACK_PING_RESPONSE) {
-                       /* ACK by sending reply DATA packet in this state */
-                       clear_bit(RXRPC_CALL_EV_ACK, &call->events);
-                       goto maybe_reschedule;
-               }
-
-               genbit = RXRPC_CALL_EV_ACK;
-
-               acks = kzalloc(call->ackr_win_top - call->rx_data_eaten,
-                              GFP_NOFS);
-               if (!acks)
-                       goto no_mem;
-
-               //hdr.flags     = RXRPC_SLOW_START_OK;
-               ack.bufferSpace = htons(8);
-               ack.maxSkew     = 0;
-
-               spin_lock_bh(&call->lock);
-               ack.reason      = call->ackr_reason;
-               ack.serial      = htonl(call->ackr_serial);
-               ack.previousPacket = htonl(call->ackr_prev_seq);
-               ack.firstPacket = htonl(call->rx_data_eaten + 1);
-
-               ack.nAcks = 0;
-               for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
-                       nbit = loop * BITS_PER_LONG;
-                       for (bits = call->ackr_window[loop]; bits; bits >>= 1
-                            ) {
-                               _debug("- l=%d n=%d b=%lx", loop, nbit, bits);
-                               if (bits & 1) {
-                                       acks[nbit] = RXRPC_ACK_TYPE_ACK;
-                                       ack.nAcks = nbit + 1;
-                               }
-                               nbit++;
-                       }
-               }
-               call->ackr_reason = 0;
-               spin_unlock_bh(&call->lock);
-
-               pad = 0;
-
-               iov[1].iov_base = &ack;
-               iov[1].iov_len  = sizeof(ack);
-               iov[2].iov_base = acks;
-               iov[2].iov_len  = ack.nAcks;
-               iov[3].iov_base = &pad;
-               iov[3].iov_len  = 3;
-               iov[4].iov_base = &ackinfo;
-               iov[4].iov_len  = sizeof(ackinfo);
-
-               switch (ack.reason) {
-               case RXRPC_ACK_REQUESTED:
-               case RXRPC_ACK_DUPLICATE:
-               case RXRPC_ACK_OUT_OF_SEQUENCE:
-               case RXRPC_ACK_EXCEEDS_WINDOW:
-               case RXRPC_ACK_NOSPACE:
-               case RXRPC_ACK_PING:
-               case RXRPC_ACK_PING_RESPONSE:
-                       goto send_ACK_with_skew;
-               case RXRPC_ACK_DELAY:
-               case RXRPC_ACK_IDLE:
-                       goto send_ACK;
-               }
+               goto recheck_state;
        }
 
-       /* handle completion of security negotiations on an incoming
-        * connection */
-       if (test_and_clear_bit(RXRPC_CALL_EV_SECURED, &call->events)) {
-               _debug("secured");
-               spin_lock_bh(&call->lock);
-
-               if (call->state == RXRPC_CALL_SERVER_SECURING) {
-                       _debug("securing");
-                       write_lock(&call->socket->call_lock);
-                       if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
-                           !test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
-                               _debug("not released");
-                               call->state = RXRPC_CALL_SERVER_ACCEPTING;
-                               list_move_tail(&call->accept_link,
-                                              &call->socket->acceptq);
-                       }
-                       write_unlock(&call->socket->call_lock);
-                       read_lock(&call->state_lock);
-                       if (call->state < RXRPC_CALL_COMPLETE)
-                               set_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events);
-                       read_unlock(&call->state_lock);
-               }
-
-               spin_unlock_bh(&call->lock);
-               if (!test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events))
-                       goto maybe_reschedule;
-       }
-
-       /* post a notification of an acceptable connection to the app */
-       if (test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events)) {
-               _debug("post accept");
-               if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL,
-                                      0, false) < 0)
-                       goto no_mem;
-               clear_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events);
-               goto maybe_reschedule;
-       }
-
-       /* handle incoming call acceptance */
-       if (test_and_clear_bit(RXRPC_CALL_EV_ACCEPTED, &call->events)) {
-               _debug("accepted");
-               ASSERTCMP(call->rx_data_post, ==, 0);
-               call->rx_data_post = 1;
-               read_lock_bh(&call->state_lock);
-               if (call->state < RXRPC_CALL_COMPLETE)
-                       set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events);
-               read_unlock_bh(&call->state_lock);
-       }
-
-       /* drain the out of sequence received packet queue into the packet Rx
-        * queue */
-       if (test_and_clear_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events)) {
-               while (call->rx_data_post == call->rx_first_oos)
-                       if (rxrpc_drain_rx_oos_queue(call) < 0)
-                               break;
-               goto maybe_reschedule;
-       }
-
-       if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
-               rxrpc_release_call(call);
-               clear_bit(RXRPC_CALL_EV_RELEASE, &call->events);
-       }
+       rxrpc_set_timer(call);
 
        /* other events may have been raised since we started checking */
-       goto maybe_reschedule;
-
-send_ACK_with_skew:
-       ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) -
-                           ntohl(ack.serial));
-send_ACK:
-       mtu = call->conn->params.peer->if_mtu;
-       mtu -= call->conn->params.peer->hdrsize;
-       ackinfo.maxMTU  = htonl(mtu);
-       ackinfo.rwind   = htonl(rxrpc_rx_window_size);
-
-       /* permit the peer to send us jumbo packets if it wants to */
-       ackinfo.rxMTU   = htonl(rxrpc_rx_mtu);
-       ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max);
-
-       serial = atomic_inc_return(&call->conn->serial);
-       whdr.serial = htonl(serial);
-       _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
-              serial,
-              ntohs(ack.maxSkew),
-              ntohl(ack.firstPacket),
-              ntohl(ack.previousPacket),
-              ntohl(ack.serial),
-              rxrpc_acks(ack.reason),
-              ack.nAcks);
-
-       del_timer_sync(&call->ack_timer);
-       if (ack.nAcks > 0)
-               set_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags);
-       goto send_message_2;
-
-send_message:
-       _debug("send message");
-
-       serial = atomic_inc_return(&call->conn->serial);
-       whdr.serial = htonl(serial);
-       _proto("Tx %s %%%u", rxrpc_pkts[whdr.type], serial);
-send_message_2:
-
-       len = iov[0].iov_len;
-       ioc = 1;
-       if (iov[4].iov_len) {
-               ioc = 5;
-               len += iov[4].iov_len;
-               len += iov[3].iov_len;
-               len += iov[2].iov_len;
-               len += iov[1].iov_len;
-       } else if (iov[3].iov_len) {
-               ioc = 4;
-               len += iov[3].iov_len;
-               len += iov[2].iov_len;
-               len += iov[1].iov_len;
-       } else if (iov[2].iov_len) {
-               ioc = 3;
-               len += iov[2].iov_len;
-               len += iov[1].iov_len;
-       } else if (iov[1].iov_len) {
-               ioc = 2;
-               len += iov[1].iov_len;
-       }
-
-       ret = kernel_sendmsg(call->conn->params.local->socket,
-                            &msg, iov, ioc, len);
-       if (ret < 0) {
-               _debug("sendmsg failed: %d", ret);
-               read_lock_bh(&call->state_lock);
-               if (call->state < RXRPC_CALL_DEAD)
-                       rxrpc_queue_call(call);
-               read_unlock_bh(&call->state_lock);
-               goto error;
-       }
-
-       switch (genbit) {
-       case RXRPC_CALL_EV_ABORT:
-               clear_bit(genbit, &call->events);
-               clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
-               goto kill_ACKs;
-
-       case RXRPC_CALL_EV_ACK_FINAL:
-               write_lock_bh(&call->state_lock);
-               if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK)
-                       call->state = RXRPC_CALL_COMPLETE;
-               write_unlock_bh(&call->state_lock);
-               goto kill_ACKs;
-
-       default:
-               clear_bit(genbit, &call->events);
-               switch (call->state) {
-               case RXRPC_CALL_CLIENT_AWAIT_REPLY:
-               case RXRPC_CALL_CLIENT_RECV_REPLY:
-               case RXRPC_CALL_SERVER_RECV_REQUEST:
-               case RXRPC_CALL_SERVER_ACK_REQUEST:
-                       _debug("start ACK timer");
-                       rxrpc_propose_ACK(call, RXRPC_ACK_DELAY,
-                                         call->ackr_serial, false);
-               default:
-                       break;
-               }
-               goto maybe_reschedule;
-       }
-
-kill_ACKs:
-       del_timer_sync(&call->ack_timer);
-       if (test_and_clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events))
-               rxrpc_put_call(call);
-       clear_bit(RXRPC_CALL_EV_ACK, &call->events);
-
-maybe_reschedule:
-       if (call->events || !skb_queue_empty(&call->rx_queue)) {
-               read_lock_bh(&call->state_lock);
-               if (call->state < RXRPC_CALL_DEAD)
-                       rxrpc_queue_call(call);
-               read_unlock_bh(&call->state_lock);
-       }
-
-       /* don't leave aborted connections on the accept queue */
-       if (call->state >= RXRPC_CALL_COMPLETE &&
-           !list_empty(&call->accept_link)) {
-               _debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }",
-                      call, call->events, call->flags, call->conn->proto.cid);
-
-               read_lock_bh(&call->state_lock);
-               if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
-                   !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
-                       rxrpc_queue_call(call);
-               read_unlock_bh(&call->state_lock);
-       }
-
-error:
-       clear_bit(RXRPC_CALL_PROC_BUSY, &call->flags);
-       kfree(acks);
-
-       /* because we don't want two CPUs both processing the work item for one
-        * call at the same time, we use a flag to note when it's busy; however
-        * this means there's a race between clearing the flag and setting the
-        * work pending bit and the work item being processed again */
-       if (call->events && !work_pending(&call->processor)) {
-               _debug("jumpstart %x", call->conn->proto.cid);
-               rxrpc_queue_call(call);
+       if (call->events && call->state < RXRPC_CALL_COMPLETE) {
+               __rxrpc_queue_call(call);
+               goto out;
        }
 
+out_put:
+       rxrpc_put_call(call, rxrpc_call_put);
+out:
        _leave("");
-       return;
-
-no_mem:
-       _debug("out of memory");
-       goto maybe_reschedule;
 }
index ae057e0..22f9b0d 100644 (file)
  */
 unsigned int rxrpc_max_call_lifetime = 60 * HZ;
 
-/*
- * Time till dead call expires after last use (in jiffies).
- */
-unsigned int rxrpc_dead_call_expiry = 2 * HZ;
-
 const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = {
-       [RXRPC_CALL_UNINITIALISED]              = "Uninit",
+       [RXRPC_CALL_UNINITIALISED]              = "Uninit  ",
        [RXRPC_CALL_CLIENT_AWAIT_CONN]          = "ClWtConn",
        [RXRPC_CALL_CLIENT_SEND_REQUEST]        = "ClSndReq",
        [RXRPC_CALL_CLIENT_AWAIT_REPLY]         = "ClAwtRpl",
        [RXRPC_CALL_CLIENT_RECV_REPLY]          = "ClRcvRpl",
-       [RXRPC_CALL_CLIENT_FINAL_ACK]           = "ClFnlACK",
+       [RXRPC_CALL_SERVER_PREALLOC]            = "SvPrealc",
        [RXRPC_CALL_SERVER_SECURING]            = "SvSecure",
        [RXRPC_CALL_SERVER_ACCEPTING]           = "SvAccept",
        [RXRPC_CALL_SERVER_RECV_REQUEST]        = "SvRcvReq",
@@ -43,22 +38,44 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = {
        [RXRPC_CALL_SERVER_SEND_REPLY]          = "SvSndRpl",
        [RXRPC_CALL_SERVER_AWAIT_ACK]           = "SvAwtACK",
        [RXRPC_CALL_COMPLETE]                   = "Complete",
-       [RXRPC_CALL_SERVER_BUSY]                = "SvBusy  ",
+};
+
+const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = {
+       [RXRPC_CALL_SUCCEEDED]                  = "Complete",
        [RXRPC_CALL_REMOTELY_ABORTED]           = "RmtAbort",
        [RXRPC_CALL_LOCALLY_ABORTED]            = "LocAbort",
+       [RXRPC_CALL_LOCAL_ERROR]                = "LocError",
        [RXRPC_CALL_NETWORK_ERROR]              = "NetError",
-       [RXRPC_CALL_DEAD]                       = "Dead    ",
+};
+
+const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = {
+       [rxrpc_call_new_client]         = "NWc",
+       [rxrpc_call_new_service]        = "NWs",
+       [rxrpc_call_queued]             = "QUE",
+       [rxrpc_call_queued_ref]         = "QUR",
+       [rxrpc_call_seen]               = "SEE",
+       [rxrpc_call_got]                = "GOT",
+       [rxrpc_call_got_userid]         = "Gus",
+       [rxrpc_call_got_kernel]         = "Gke",
+       [rxrpc_call_put]                = "PUT",
+       [rxrpc_call_put_userid]         = "Pus",
+       [rxrpc_call_put_kernel]         = "Pke",
+       [rxrpc_call_put_noqueue]        = "PNQ",
 };
 
 struct kmem_cache *rxrpc_call_jar;
 LIST_HEAD(rxrpc_calls);
 DEFINE_RWLOCK(rxrpc_call_lock);
 
-static void rxrpc_destroy_call(struct work_struct *work);
-static void rxrpc_call_life_expired(unsigned long _call);
-static void rxrpc_dead_call_expired(unsigned long _call);
-static void rxrpc_ack_time_expired(unsigned long _call);
-static void rxrpc_resend_time_expired(unsigned long _call);
+static void rxrpc_call_timer_expired(unsigned long _call)
+{
+       struct rxrpc_call *call = (struct rxrpc_call *)_call;
+
+       _enter("%d", call->debug_id);
+
+       if (call->state < RXRPC_CALL_COMPLETE)
+               rxrpc_queue_call(call);
+}
 
 /*
  * find an extant server call
@@ -91,7 +108,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx,
        return NULL;
 
 found_extant_call:
-       rxrpc_get_call(call);
+       rxrpc_get_call(call, rxrpc_call_got);
        read_unlock(&rx->call_lock);
        _leave(" = %p [%d]", call, atomic_read(&call->usage));
        return call;
@@ -100,7 +117,7 @@ found_extant_call:
 /*
  * allocate a new call
  */
-static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
+struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
 {
        struct rxrpc_call *call;
 
@@ -108,29 +125,25 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
        if (!call)
                return NULL;
 
-       call->acks_winsz = 16;
-       call->acks_window = kmalloc(call->acks_winsz * sizeof(unsigned long),
+       call->rxtx_buffer = kcalloc(RXRPC_RXTX_BUFF_SIZE,
+                                   sizeof(struct sk_buff *),
                                    gfp);
-       if (!call->acks_window) {
-               kmem_cache_free(rxrpc_call_jar, call);
-               return NULL;
-       }
+       if (!call->rxtx_buffer)
+               goto nomem;
 
-       setup_timer(&call->lifetimer, &rxrpc_call_life_expired,
-                   (unsigned long) call);
-       setup_timer(&call->deadspan, &rxrpc_dead_call_expired,
-                   (unsigned long) call);
-       setup_timer(&call->ack_timer, &rxrpc_ack_time_expired,
-                   (unsigned long) call);
-       setup_timer(&call->resend_timer, &rxrpc_resend_time_expired,
-                   (unsigned long) call);
-       INIT_WORK(&call->destroyer, &rxrpc_destroy_call);
+       call->rxtx_annotations = kcalloc(RXRPC_RXTX_BUFF_SIZE, sizeof(u8), gfp);
+       if (!call->rxtx_annotations)
+               goto nomem_2;
+
+       setup_timer(&call->timer, rxrpc_call_timer_expired,
+                   (unsigned long)call);
        INIT_WORK(&call->processor, &rxrpc_process_call);
        INIT_LIST_HEAD(&call->link);
+       INIT_LIST_HEAD(&call->chan_wait_link);
        INIT_LIST_HEAD(&call->accept_link);
-       skb_queue_head_init(&call->rx_queue);
-       skb_queue_head_init(&call->rx_oos_queue);
-       init_waitqueue_head(&call->tx_waitq);
+       INIT_LIST_HEAD(&call->recvmsg_link);
+       INIT_LIST_HEAD(&call->sock_link);
+       init_waitqueue_head(&call->waitq);
        spin_lock_init(&call->lock);
        rwlock_init(&call->state_lock);
        atomic_set(&call->usage, 1);
@@ -138,70 +151,52 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
 
        memset(&call->sock_node, 0xed, sizeof(call->sock_node));
 
-       call->rx_data_expect = 1;
-       call->rx_data_eaten = 0;
-       call->rx_first_oos = 0;
-       call->ackr_win_top = call->rx_data_eaten + 1 + rxrpc_rx_window_size;
-       call->creation_jif = jiffies;
+       /* Leave space in the ring to handle a maxed-out jumbo packet */
+       call->rx_winsize = rxrpc_rx_window_size;
+       call->tx_winsize = 16;
+       call->rx_expect_next = 1;
        return call;
+
+nomem_2:
+       kfree(call->rxtx_buffer);
+nomem:
+       kmem_cache_free(rxrpc_call_jar, call);
+       return NULL;
 }
 
 /*
  * Allocate a new client call.
  */
-static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
-                                                 struct sockaddr_rxrpc *srx,
+static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
                                                  gfp_t gfp)
 {
        struct rxrpc_call *call;
 
        _enter("");
 
-       ASSERT(rx->local != NULL);
-
        call = rxrpc_alloc_call(gfp);
        if (!call)
                return ERR_PTR(-ENOMEM);
        call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
-
-       sock_hold(&rx->sk);
-       call->socket = rx;
-       call->rx_data_post = 1;
-
-       call->local = rx->local;
        call->service_id = srx->srx_service;
-       call->in_clientflag = 0;
 
        _leave(" = %p", call);
        return call;
 }
 
 /*
- * Begin client call.
+ * Initiate the call ack/resend/expiry timer.
  */
-static int rxrpc_begin_client_call(struct rxrpc_call *call,
-                                  struct rxrpc_conn_parameters *cp,
-                                  struct sockaddr_rxrpc *srx,
-                                  gfp_t gfp)
+static void rxrpc_start_call_timer(struct rxrpc_call *call)
 {
-       int ret;
-
-       /* Set up or get a connection record and set the protocol parameters,
-        * including channel number and call ID.
-        */
-       ret = rxrpc_connect_call(call, cp, srx, gfp);
-       if (ret < 0)
-               return ret;
-
-       call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
-
-       spin_lock(&call->conn->params.peer->lock);
-       hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets);
-       spin_unlock(&call->conn->params.peer->lock);
-
-       call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime;
-       add_timer(&call->lifetimer);
-       return 0;
+       unsigned long expire_at;
+
+       expire_at = jiffies + rxrpc_max_call_lifetime;
+       call->expire_at = expire_at;
+       call->ack_at = expire_at;
+       call->resend_at = expire_at;
+       call->timer.expires = expire_at;
+       add_timer(&call->timer);
 }
 
 /*
@@ -216,16 +211,20 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 {
        struct rxrpc_call *call, *xcall;
        struct rb_node *parent, **pp;
+       const void *here = __builtin_return_address(0);
        int ret;
 
        _enter("%p,%lx", rx, user_call_ID);
 
-       call = rxrpc_alloc_client_call(rx, srx, gfp);
+       call = rxrpc_alloc_client_call(srx, gfp);
        if (IS_ERR(call)) {
                _leave(" = %ld", PTR_ERR(call));
                return call;
        }
 
+       trace_rxrpc_call(call, 0, atomic_read(&call->usage), here,
+                        (const void *)user_call_ID);
+
        /* Publish the call, even though it is incompletely set up as yet */
        call->user_call_ID = user_call_ID;
        __set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
@@ -246,20 +245,32 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
                        goto found_user_ID_now_present;
        }
 
-       rxrpc_get_call(call);
-
+       rcu_assign_pointer(call->socket, rx);
+       rxrpc_get_call(call, rxrpc_call_got_userid);
        rb_link_node(&call->sock_node, parent, pp);
        rb_insert_color(&call->sock_node, &rx->calls);
+       list_add(&call->sock_link, &rx->sock_calls);
+
        write_unlock(&rx->call_lock);
 
-       write_lock_bh(&rxrpc_call_lock);
+       write_lock(&rxrpc_call_lock);
        list_add_tail(&call->link, &rxrpc_calls);
-       write_unlock_bh(&rxrpc_call_lock);
+       write_unlock(&rxrpc_call_lock);
 
-       ret = rxrpc_begin_client_call(call, cp, srx, gfp);
+       /* Set up or get a connection record and set the protocol parameters,
+        * including channel number and call ID.
+        */
+       ret = rxrpc_connect_call(call, cp, srx, gfp);
        if (ret < 0)
                goto error;
 
+       spin_lock_bh(&call->conn->params.peer->lock);
+       hlist_add_head(&call->error_link,
+                      &call->conn->params.peer->error_targets);
+       spin_unlock_bh(&call->conn->params.peer->lock);
+
+       rxrpc_start_call_timer(call);
+
        _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
 
        _leave(" = %p [new]", call);
@@ -269,15 +280,17 @@ error:
        write_lock(&rx->call_lock);
        rb_erase(&call->sock_node, &rx->calls);
        write_unlock(&rx->call_lock);
-       rxrpc_put_call(call);
+       rxrpc_put_call(call, rxrpc_call_put_userid);
 
-       write_lock_bh(&rxrpc_call_lock);
+       write_lock(&rxrpc_call_lock);
        list_del_init(&call->link);
-       write_unlock_bh(&rxrpc_call_lock);
+       write_unlock(&rxrpc_call_lock);
 
+error_out:
+       __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+                                   RX_CALL_DEAD, ret);
        set_bit(RXRPC_CALL_RELEASED, &call->flags);
-       call->state = RXRPC_CALL_DEAD;
-       rxrpc_put_call(call);
+       rxrpc_put_call(call, rxrpc_call_put);
        _leave(" = %d", ret);
        return ERR_PTR(ret);
 
@@ -288,324 +301,217 @@ error:
         */
 found_user_ID_now_present:
        write_unlock(&rx->call_lock);
-       set_bit(RXRPC_CALL_RELEASED, &call->flags);
-       call->state = RXRPC_CALL_DEAD;
-       rxrpc_put_call(call);
-       _leave(" = -EEXIST [%p]", call);
-       return ERR_PTR(-EEXIST);
+       ret = -EEXIST;
+       goto error_out;
 }
 
 /*
- * set up an incoming call
- * - called in process context with IRQs enabled
+ * Set up an incoming call.  call->conn points to the connection.
+ * This is called in BH context and isn't allowed to fail.
  */
-struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
-                                      struct rxrpc_connection *conn,
-                                      struct sk_buff *skb)
+void rxrpc_incoming_call(struct rxrpc_sock *rx,
+                        struct rxrpc_call *call,
+                        struct sk_buff *skb)
 {
+       struct rxrpc_connection *conn = call->conn;
        struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-       struct rxrpc_call *call, *candidate;
-       u32 call_id, chan;
-
-       _enter(",%d", conn->debug_id);
-
-       ASSERT(rx != NULL);
-
-       candidate = rxrpc_alloc_call(GFP_NOIO);
-       if (!candidate)
-               return ERR_PTR(-EBUSY);
-
-       chan = sp->hdr.cid & RXRPC_CHANNELMASK;
-       candidate->socket       = rx;
-       candidate->conn         = conn;
-       candidate->cid          = sp->hdr.cid;
-       candidate->call_id      = sp->hdr.callNumber;
-       candidate->channel      = chan;
-       candidate->rx_data_post = 0;
-       candidate->state        = RXRPC_CALL_SERVER_ACCEPTING;
-       if (conn->security_ix > 0)
-               candidate->state = RXRPC_CALL_SERVER_SECURING;
-
-       spin_lock(&conn->channel_lock);
-
-       /* set the channel for this call */
-       call = rcu_dereference_protected(conn->channels[chan].call,
-                                        lockdep_is_held(&conn->channel_lock));
-
-       _debug("channel[%u] is %p", candidate->channel, call);
-       if (call && call->call_id == sp->hdr.callNumber) {
-               /* already set; must've been a duplicate packet */
-               _debug("extant call [%d]", call->state);
-               ASSERTCMP(call->conn, ==, conn);
-
-               read_lock(&call->state_lock);
-               switch (call->state) {
-               case RXRPC_CALL_LOCALLY_ABORTED:
-                       if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events))
-                               rxrpc_queue_call(call);
-               case RXRPC_CALL_REMOTELY_ABORTED:
-                       read_unlock(&call->state_lock);
-                       goto aborted_call;
-               default:
-                       rxrpc_get_call(call);
-                       read_unlock(&call->state_lock);
-                       goto extant_call;
-               }
-       }
-
-       if (call) {
-               /* it seems the channel is still in use from the previous call
-                * - ditch the old binding if its call is now complete */
-               _debug("CALL: %u { %s }",
-                      call->debug_id, rxrpc_call_states[call->state]);
-
-               if (call->state >= RXRPC_CALL_COMPLETE) {
-                       __rxrpc_disconnect_call(call);
-               } else {
-                       spin_unlock(&conn->channel_lock);
-                       kmem_cache_free(rxrpc_call_jar, candidate);
-                       _leave(" = -EBUSY");
-                       return ERR_PTR(-EBUSY);
-               }
-       }
-
-       /* check the call number isn't duplicate */
-       _debug("check dup");
-       call_id = sp->hdr.callNumber;
-
-       /* We just ignore calls prior to the current call ID.  Terminated calls
-        * are handled via the connection.
+       u32 chan;
+
+       _enter(",%d", call->conn->debug_id);
+
+       rcu_assign_pointer(call->socket, rx);
+       call->call_id           = sp->hdr.callNumber;
+       call->service_id        = sp->hdr.serviceId;
+       call->cid               = sp->hdr.cid;
+       call->state             = RXRPC_CALL_SERVER_ACCEPTING;
+       if (sp->hdr.securityIndex > 0)
+               call->state     = RXRPC_CALL_SERVER_SECURING;
+
+       /* Set the channel for this call.  We don't get channel_lock as we're
+        * only defending against the data_ready handler (which we're called
+        * from) and the RESPONSE packet parser (which is only really
+        * interested in call_counter and can cope with a disagreement with the
+        * call pointer).
         */
-       if (call_id <= conn->channels[chan].call_counter)
-               goto old_call; /* TODO: Just drop packet */
-
-       /* make the call available */
-       _debug("new call");
-       call = candidate;
-       candidate = NULL;
-       conn->channels[chan].call_counter = call_id;
+       chan = sp->hdr.cid & RXRPC_CHANNELMASK;
+       conn->channels[chan].call_counter = call->call_id;
+       conn->channels[chan].call_id = call->call_id;
        rcu_assign_pointer(conn->channels[chan].call, call);
-       sock_hold(&rx->sk);
-       rxrpc_get_connection(conn);
-       spin_unlock(&conn->channel_lock);
 
        spin_lock(&conn->params.peer->lock);
        hlist_add_head(&call->error_link, &conn->params.peer->error_targets);
        spin_unlock(&conn->params.peer->lock);
 
-       write_lock_bh(&rxrpc_call_lock);
-       list_add_tail(&call->link, &rxrpc_calls);
-       write_unlock_bh(&rxrpc_call_lock);
+       _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
 
-       call->local = conn->params.local;
-       call->epoch = conn->proto.epoch;
-       call->service_id = conn->params.service_id;
-       call->in_clientflag = RXRPC_CLIENT_INITIATED;
+       rxrpc_start_call_timer(call);
+       _leave("");
+}
 
-       _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
+/*
+ * Queue a call's work processor, getting a ref to pass to the work queue.
+ */
+bool rxrpc_queue_call(struct rxrpc_call *call)
+{
+       const void *here = __builtin_return_address(0);
+       int n = __atomic_add_unless(&call->usage, 1, 0);
+       if (n == 0)
+               return false;
+       if (rxrpc_queue_work(&call->processor))
+               trace_rxrpc_call(call, rxrpc_call_queued, n + 1, here, NULL);
+       else
+               rxrpc_put_call(call, rxrpc_call_put_noqueue);
+       return true;
+}
 
-       call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime;
-       add_timer(&call->lifetimer);
-       _leave(" = %p {%d} [new]", call, call->debug_id);
-       return call;
+/*
+ * Queue a call's work processor, passing the callers ref to the work queue.
+ */
+bool __rxrpc_queue_call(struct rxrpc_call *call)
+{
+       const void *here = __builtin_return_address(0);
+       int n = atomic_read(&call->usage);
+       ASSERTCMP(n, >=, 1);
+       if (rxrpc_queue_work(&call->processor))
+               trace_rxrpc_call(call, rxrpc_call_queued_ref, n, here, NULL);
+       else
+               rxrpc_put_call(call, rxrpc_call_put_noqueue);
+       return true;
+}
 
-extant_call:
-       spin_unlock(&conn->channel_lock);
-       kmem_cache_free(rxrpc_call_jar, candidate);
-       _leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1);
-       return call;
+/*
+ * Note the re-emergence of a call.
+ */
+void rxrpc_see_call(struct rxrpc_call *call)
+{
+       const void *here = __builtin_return_address(0);
+       if (call) {
+               int n = atomic_read(&call->usage);
 
-aborted_call:
-       spin_unlock(&conn->channel_lock);
-       kmem_cache_free(rxrpc_call_jar, candidate);
-       _leave(" = -ECONNABORTED");
-       return ERR_PTR(-ECONNABORTED);
-
-old_call:
-       spin_unlock(&conn->channel_lock);
-       kmem_cache_free(rxrpc_call_jar, candidate);
-       _leave(" = -ECONNRESET [old]");
-       return ERR_PTR(-ECONNRESET);
+               trace_rxrpc_call(call, rxrpc_call_seen, n, here, NULL);
+       }
 }
 
 /*
- * detach a call from a socket and set up for release
+ * Note the addition of a ref on a call.
  */
-void rxrpc_release_call(struct rxrpc_call *call)
+void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
+{
+       const void *here = __builtin_return_address(0);
+       int n = atomic_inc_return(&call->usage);
+
+       trace_rxrpc_call(call, op, n, here, NULL);
+}
+
+/*
+ * Detach a call from its owning socket.
+ */
+void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
 {
        struct rxrpc_connection *conn = call->conn;
-       struct rxrpc_sock *rx = call->socket;
+       bool put = false;
+       int i;
+
+       _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
 
-       _enter("{%d,%d,%d,%d}",
-              call->debug_id, atomic_read(&call->usage),
-              atomic_read(&call->ackr_not_idle),
-              call->rx_first_oos);
+       ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
+
+       rxrpc_see_call(call);
 
        spin_lock_bh(&call->lock);
        if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags))
                BUG();
        spin_unlock_bh(&call->lock);
 
-       /* dissociate from the socket
-        * - the socket's ref on the call is passed to the death timer
-        */
-       _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
+       del_timer_sync(&call->timer);
 
-       spin_lock(&conn->params.peer->lock);
-       hlist_del_init(&call->error_link);
-       spin_unlock(&conn->params.peer->lock);
+       /* Make sure we don't get any more notifications */
+       write_lock_bh(&rx->recvmsg_lock);
 
-       write_lock_bh(&rx->call_lock);
-       if (!list_empty(&call->accept_link)) {
+       if (!list_empty(&call->recvmsg_link)) {
                _debug("unlinking once-pending call %p { e=%lx f=%lx }",
                       call, call->events, call->flags);
-               ASSERT(!test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
-               list_del_init(&call->accept_link);
-               sk_acceptq_removed(&rx->sk);
-       } else if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
-               rb_erase(&call->sock_node, &rx->calls);
-               memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
-               clear_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+               list_del(&call->recvmsg_link);
+               put = true;
        }
-       write_unlock_bh(&rx->call_lock);
 
-       /* free up the channel for reuse */
-       write_lock_bh(&call->state_lock);
+       /* list_empty() must return false in rxrpc_notify_socket() */
+       call->recvmsg_link.next = NULL;
+       call->recvmsg_link.prev = NULL;
 
-       if (call->state < RXRPC_CALL_COMPLETE &&
-           call->state != RXRPC_CALL_CLIENT_FINAL_ACK) {
-               _debug("+++ ABORTING STATE %d +++\n", call->state);
-               call->state = RXRPC_CALL_LOCALLY_ABORTED;
-               call->local_abort = RX_CALL_DEAD;
-       }
-       write_unlock_bh(&call->state_lock);
+       write_unlock_bh(&rx->recvmsg_lock);
+       if (put)
+               rxrpc_put_call(call, rxrpc_call_put);
 
-       rxrpc_disconnect_call(call);
+       write_lock(&rx->call_lock);
 
-       /* clean up the Rx queue */
-       if (!skb_queue_empty(&call->rx_queue) ||
-           !skb_queue_empty(&call->rx_oos_queue)) {
-               struct rxrpc_skb_priv *sp;
-               struct sk_buff *skb;
+       if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+               rb_erase(&call->sock_node, &rx->calls);
+               memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
+               rxrpc_put_call(call, rxrpc_call_put_userid);
+       }
 
-               _debug("purge Rx queues");
+       list_del(&call->sock_link);
+       write_unlock(&rx->call_lock);
 
-               spin_lock_bh(&call->lock);
-               while ((skb = skb_dequeue(&call->rx_queue)) ||
-                      (skb = skb_dequeue(&call->rx_oos_queue))) {
-                       spin_unlock_bh(&call->lock);
+       _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
 
-                       sp = rxrpc_skb(skb);
-                       _debug("- zap %s %%%u #%u",
-                              rxrpc_pkts[sp->hdr.type],
-                              sp->hdr.serial, sp->hdr.seq);
-                       rxrpc_free_skb(skb);
-                       spin_lock_bh(&call->lock);
-               }
-               spin_unlock_bh(&call->lock);
+       if (conn)
+               rxrpc_disconnect_call(call);
 
-               ASSERTCMP(call->state, !=, RXRPC_CALL_COMPLETE);
+       for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) {
+               rxrpc_free_skb(call->rxtx_buffer[i]);
+               call->rxtx_buffer[i] = NULL;
        }
 
-       del_timer_sync(&call->resend_timer);
-       del_timer_sync(&call->ack_timer);
-       del_timer_sync(&call->lifetimer);
-       call->deadspan.expires = jiffies + rxrpc_dead_call_expiry;
-       add_timer(&call->deadspan);
-
        _leave("");
 }
 
-/*
- * handle a dead call being ready for reaping
- */
-static void rxrpc_dead_call_expired(unsigned long _call)
-{
-       struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
-       _enter("{%d}", call->debug_id);
-
-       write_lock_bh(&call->state_lock);
-       call->state = RXRPC_CALL_DEAD;
-       write_unlock_bh(&call->state_lock);
-       rxrpc_put_call(call);
-}
-
-/*
- * mark a call as to be released, aborting it if it's still in progress
- * - called with softirqs disabled
- */
-static void rxrpc_mark_call_released(struct rxrpc_call *call)
-{
-       bool sched;
-
-       write_lock(&call->state_lock);
-       if (call->state < RXRPC_CALL_DEAD) {
-               sched = false;
-               if (call->state < RXRPC_CALL_COMPLETE) {
-                       _debug("abort call %p", call);
-                       call->state = RXRPC_CALL_LOCALLY_ABORTED;
-                       call->local_abort = RX_CALL_DEAD;
-                       if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events))
-                               sched = true;
-               }
-               if (!test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
-                       sched = true;
-               if (sched)
-                       rxrpc_queue_call(call);
-       }
-       write_unlock(&call->state_lock);
-}
-
 /*
  * release all the calls associated with a socket
  */
 void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
 {
        struct rxrpc_call *call;
-       struct rb_node *p;
 
        _enter("%p", rx);
 
-       read_lock_bh(&rx->call_lock);
-
-       /* mark all the calls as no longer wanting incoming packets */
-       for (p = rb_first(&rx->calls); p; p = rb_next(p)) {
-               call = rb_entry(p, struct rxrpc_call, sock_node);
-               rxrpc_mark_call_released(call);
-       }
-
-       /* kill the not-yet-accepted incoming calls */
-       list_for_each_entry(call, &rx->secureq, accept_link) {
-               rxrpc_mark_call_released(call);
+       while (!list_empty(&rx->sock_calls)) {
+               call = list_entry(rx->sock_calls.next,
+                                 struct rxrpc_call, sock_link);
+               rxrpc_get_call(call, rxrpc_call_got);
+               rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET);
+               rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+               rxrpc_release_call(rx, call);
+               rxrpc_put_call(call, rxrpc_call_put);
        }
 
-       list_for_each_entry(call, &rx->acceptq, accept_link) {
-               rxrpc_mark_call_released(call);
-       }
-
-       read_unlock_bh(&rx->call_lock);
        _leave("");
 }
 
 /*
  * release a call
  */
-void __rxrpc_put_call(struct rxrpc_call *call)
+void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
 {
+       const void *here = __builtin_return_address(0);
+       int n;
+
        ASSERT(call != NULL);
 
-       _enter("%p{u=%d}", call, atomic_read(&call->usage));
+       n = atomic_dec_return(&call->usage);
+       trace_rxrpc_call(call, op, n, here, NULL);
+       ASSERTCMP(n, >=, 0);
+       if (n == 0) {
+               _debug("call %d dead", call->debug_id);
+               ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
 
-       ASSERTCMP(atomic_read(&call->usage), >, 0);
+               write_lock(&rxrpc_call_lock);
+               list_del_init(&call->link);
+               write_unlock(&rxrpc_call_lock);
 
-       if (atomic_dec_and_test(&call->usage)) {
-               _debug("call %d dead", call->debug_id);
-               WARN_ON(atomic_read(&call->skb_count) != 0);
-               ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
-               rxrpc_queue_work(&call->destroyer);
+               rxrpc_cleanup_call(call);
        }
-       _leave("");
 }
 
 /*
@@ -615,187 +521,68 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
 {
        struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
 
-       rxrpc_purge_queue(&call->rx_queue);
+       rxrpc_put_peer(call->peer);
+       kfree(call->rxtx_buffer);
+       kfree(call->rxtx_annotations);
        kmem_cache_free(rxrpc_call_jar, call);
 }
 
 /*
  * clean up a call
  */
-static void rxrpc_cleanup_call(struct rxrpc_call *call)
+void rxrpc_cleanup_call(struct rxrpc_call *call)
 {
-       _net("DESTROY CALL %d", call->debug_id);
+       int i;
 
-       ASSERT(call->socket);
+       _net("DESTROY CALL %d", call->debug_id);
 
        memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
 
-       del_timer_sync(&call->lifetimer);
-       del_timer_sync(&call->deadspan);
-       del_timer_sync(&call->ack_timer);
-       del_timer_sync(&call->resend_timer);
+       del_timer_sync(&call->timer);
 
+       ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
        ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
-       ASSERTCMP(call->events, ==, 0);
-       if (work_pending(&call->processor)) {
-               _debug("defer destroy");
-               rxrpc_queue_work(&call->destroyer);
-               return;
-       }
-
        ASSERTCMP(call->conn, ==, NULL);
 
-       if (call->acks_window) {
-               _debug("kill Tx window %d",
-                      CIRC_CNT(call->acks_head, call->acks_tail,
-                               call->acks_winsz));
-               smp_mb();
-               while (CIRC_CNT(call->acks_head, call->acks_tail,
-                               call->acks_winsz) > 0) {
-                       struct rxrpc_skb_priv *sp;
-                       unsigned long _skb;
-
-                       _skb = call->acks_window[call->acks_tail] & ~1;
-                       sp = rxrpc_skb((struct sk_buff *)_skb);
-                       _debug("+++ clear Tx %u", sp->hdr.seq);
-                       rxrpc_free_skb((struct sk_buff *)_skb);
-                       call->acks_tail =
-                               (call->acks_tail + 1) & (call->acks_winsz - 1);
-               }
-
-               kfree(call->acks_window);
-       }
+       /* Clean up the Rx/Tx buffer */
+       for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++)
+               rxrpc_free_skb(call->rxtx_buffer[i]);
 
        rxrpc_free_skb(call->tx_pending);
 
-       rxrpc_purge_queue(&call->rx_queue);
-       ASSERT(skb_queue_empty(&call->rx_oos_queue));
-       sock_put(&call->socket->sk);
        call_rcu(&call->rcu, rxrpc_rcu_destroy_call);
 }
 
 /*
- * destroy a call
- */
-static void rxrpc_destroy_call(struct work_struct *work)
-{
-       struct rxrpc_call *call =
-               container_of(work, struct rxrpc_call, destroyer);
-
-       _enter("%p{%d,%d,%p}",
-              call, atomic_read(&call->usage), call->channel, call->conn);
-
-       ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
-
-       write_lock_bh(&rxrpc_call_lock);
-       list_del_init(&call->link);
-       write_unlock_bh(&rxrpc_call_lock);
-
-       rxrpc_cleanup_call(call);
-       _leave("");
-}
-
-/*
- * preemptively destroy all the call records from a transport endpoint rather
- * than waiting for them to time out
+ * Make sure that all calls are gone.
  */
 void __exit rxrpc_destroy_all_calls(void)
 {
        struct rxrpc_call *call;
 
        _enter("");
-       write_lock_bh(&rxrpc_call_lock);
+
+       if (list_empty(&rxrpc_calls))
+               return;
+
+       write_lock(&rxrpc_call_lock);
 
        while (!list_empty(&rxrpc_calls)) {
                call = list_entry(rxrpc_calls.next, struct rxrpc_call, link);
                _debug("Zapping call %p", call);
 
+               rxrpc_see_call(call);
                list_del_init(&call->link);
 
-               switch (atomic_read(&call->usage)) {
-               case 0:
-                       ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
-                       break;
-               case 1:
-                       if (del_timer_sync(&call->deadspan) != 0 &&
-                           call->state != RXRPC_CALL_DEAD)
-                               rxrpc_dead_call_expired((unsigned long) call);
-                       if (call->state != RXRPC_CALL_DEAD)
-                               break;
-               default:
-                       pr_err("Call %p still in use (%d,%d,%s,%lx,%lx)!\n",
-                              call, atomic_read(&call->usage),
-                              atomic_read(&call->ackr_not_idle),
-                              rxrpc_call_states[call->state],
-                              call->flags, call->events);
-                       if (!skb_queue_empty(&call->rx_queue))
-                               pr_err("Rx queue occupied\n");
-                       if (!skb_queue_empty(&call->rx_oos_queue))
-                               pr_err("OOS queue occupied\n");
-                       break;
-               }
-
-               write_unlock_bh(&rxrpc_call_lock);
-               cond_resched();
-               write_lock_bh(&rxrpc_call_lock);
-       }
-
-       write_unlock_bh(&rxrpc_call_lock);
-       _leave("");
-}
-
-/*
- * handle call lifetime being exceeded
- */
-static void rxrpc_call_life_expired(unsigned long _call)
-{
-       struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
-       if (call->state >= RXRPC_CALL_COMPLETE)
-               return;
+               pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
+                      call, atomic_read(&call->usage),
+                      rxrpc_call_states[call->state],
+                      call->flags, call->events);
 
-       _enter("{%d}", call->debug_id);
-       read_lock_bh(&call->state_lock);
-       if (call->state < RXRPC_CALL_COMPLETE) {
-               set_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events);
-               rxrpc_queue_call(call);
+               write_unlock(&rxrpc_call_lock);
+               cond_resched();
+               write_lock(&rxrpc_call_lock);
        }
-       read_unlock_bh(&call->state_lock);
-}
 
-/*
- * handle resend timer expiry
- * - may not take call->state_lock as this can deadlock against del_timer_sync()
- */
-static void rxrpc_resend_time_expired(unsigned long _call)
-{
-       struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
-       _enter("{%d}", call->debug_id);
-
-       if (call->state >= RXRPC_CALL_COMPLETE)
-               return;
-
-       clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
-       if (!test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events))
-               rxrpc_queue_call(call);
-}
-
-/*
- * handle ACK timer expiry
- */
-static void rxrpc_ack_time_expired(unsigned long _call)
-{
-       struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
-       _enter("{%d}", call->debug_id);
-
-       if (call->state >= RXRPC_CALL_COMPLETE)
-               return;
-
-       read_lock_bh(&call->state_lock);
-       if (call->state < RXRPC_CALL_COMPLETE &&
-           !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events))
-               rxrpc_queue_call(call);
-       read_unlock_bh(&call->state_lock);
+       write_unlock(&rxrpc_call_lock);
 }
index 9e91f27..9344a84 100644 (file)
@@ -7,6 +7,68 @@
  * modify it under the terms of the GNU General Public Licence
  * as published by the Free Software Foundation; either version
  * 2 of the Licence, or (at your option) any later version.
+ *
+ *
+ * Client connections need to be cached for a little while after they've made a
+ * call so as to handle retransmitted DATA packets in case the server didn't
+ * receive the final ACK or terminating ABORT we sent it.
+ *
+ * Client connections can be in one of a number of cache states:
+ *
+ *  (1) INACTIVE - The connection is not held in any list and may not have been
+ *      exposed to the world.  If it has been previously exposed, it was
+ *      discarded from the idle list after expiring.
+ *
+ *  (2) WAITING - The connection is waiting for the number of client conns to
+ *      drop below the maximum capacity.  Calls may be in progress upon it from
+ *      when it was active and got culled.
+ *
+ *     The connection is on the rxrpc_waiting_client_conns list which is kept
+ *     in to-be-granted order.  Culled conns with waiters go to the back of
+ *     the queue just like new conns.
+ *
+ *  (3) ACTIVE - The connection has at least one call in progress upon it, it
+ *      may freely grant available channels to new calls and calls may be
+ *      waiting on it for channels to become available.
+ *
+ *     The connection is on the rxrpc_active_client_conns list which is kept
+ *     in activation order for culling purposes.
+ *
+ *     rxrpc_nr_active_client_conns is held incremented also.
+ *
+ *  (4) CULLED - The connection got summarily culled to try and free up
+ *      capacity.  Calls currently in progress on the connection are allowed to
+ *      continue, but new calls will have to wait.  There can be no waiters in
+ *      this state - the conn would have to go to the WAITING state instead.
+ *
+ *  (5) IDLE - The connection has no calls in progress upon it and must have
+ *      been exposed to the world (ie. the EXPOSED flag must be set).  When it
+ *      expires, the EXPOSED flag is cleared and the connection transitions to
+ *      the INACTIVE state.
+ *
+ *     The connection is on the rxrpc_idle_client_conns list which is kept in
+ *     order of how soon they'll expire.
+ *
+ * There are flags of relevance to the cache:
+ *
+ *  (1) EXPOSED - The connection ID got exposed to the world.  If this flag is
+ *      set, an extra ref is added to the connection preventing it from being
+ *      reaped when it has no calls outstanding.  This flag is cleared and the
+ *      ref dropped when a conn is discarded from the idle list.
+ *
+ *      This allows us to move terminal call state retransmission to the
+ *      connection and to discard the call immediately we think it is done
+ *      with.  It also give us a chance to reuse the connection.
+ *
+ *  (2) DONT_REUSE - The connection should be discarded as soon as possible and
+ *      should not be reused.  This is set when an exclusive connection is used
+ *      or a call ID counter overflows.
+ *
+ * The caching state may only be changed if the cache lock is held.
+ *
+ * There are two idle client connection expiry durations.  If the total number
+ * of connections is below the reap threshold, we use the normal duration; if
+ * it's above, we use the fast duration.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/timer.h>
 #include "ar-internal.h"
 
+__read_mostly unsigned int rxrpc_max_client_connections = 1000;
+__read_mostly unsigned int rxrpc_reap_client_connections = 900;
+__read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
+__read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
+
+static unsigned int rxrpc_nr_client_conns;
+static unsigned int rxrpc_nr_active_client_conns;
+static __read_mostly bool rxrpc_kill_all_client_conns;
+
+static DEFINE_SPINLOCK(rxrpc_client_conn_cache_lock);
+static DEFINE_SPINLOCK(rxrpc_client_conn_discard_mutex);
+static LIST_HEAD(rxrpc_waiting_client_conns);
+static LIST_HEAD(rxrpc_active_client_conns);
+static LIST_HEAD(rxrpc_idle_client_conns);
+
 /*
  * We use machine-unique IDs for our client connections.
  */
 DEFINE_IDR(rxrpc_client_conn_ids);
 static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
 
+static void rxrpc_cull_active_client_conns(void);
+static void rxrpc_discard_expired_client_conns(struct work_struct *);
+
+static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap,
+                           rxrpc_discard_expired_client_conns);
+
 /*
  * Get a connection ID and epoch for a client connection from the global pool.
  * The connection struct pointer is then recorded in the idr radix tree.  The
- * epoch is changed if this wraps.
- *
- * TODO: The IDR tree gets very expensive on memory if the connection IDs are
- * widely scattered throughout the number space, so we shall need to retire
- * connections that have, say, an ID more than four times the maximum number of
- * client conns away from the current allocation point to try and keep the IDs
- * concentrated.  We will also need to retire connections from an old epoch.
+ * epoch doesn't change until the client is rebooted (or, at least, unless the
+ * module is unloaded).
  */
 static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
                                          gfp_t gfp)
 {
-       u32 epoch;
        int id;
 
        _enter("");
@@ -44,34 +121,18 @@ static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
        idr_preload(gfp);
        spin_lock(&rxrpc_conn_id_lock);
 
-       epoch = rxrpc_epoch;
-
-       /* We could use idr_alloc_cyclic() here, but we really need to know
-        * when the thing wraps so that we can advance the epoch.
-        */
-       if (rxrpc_client_conn_ids.cur == 0)
-               rxrpc_client_conn_ids.cur = 1;
-       id = idr_alloc(&rxrpc_client_conn_ids, conn,
-                      rxrpc_client_conn_ids.cur, 0x40000000, GFP_NOWAIT);
-       if (id < 0) {
-               if (id != -ENOSPC)
-                       goto error;
-               id = idr_alloc(&rxrpc_client_conn_ids, conn,
-                              1, 0x40000000, GFP_NOWAIT);
-               if (id < 0)
-                       goto error;
-               epoch++;
-               rxrpc_epoch = epoch;
-       }
-       rxrpc_client_conn_ids.cur = id + 1;
+       id = idr_alloc_cyclic(&rxrpc_client_conn_ids, conn,
+                             1, 0x40000000, GFP_NOWAIT);
+       if (id < 0)
+               goto error;
 
        spin_unlock(&rxrpc_conn_id_lock);
        idr_preload_end();
 
-       conn->proto.epoch = epoch;
+       conn->proto.epoch = rxrpc_epoch;
        conn->proto.cid = id << RXRPC_CIDSHIFT;
        set_bit(RXRPC_CONN_HAS_IDR, &conn->flags);
-       _leave(" [CID %x:%x]", epoch, conn->proto.cid);
+       _leave(" [CID %x]", conn->proto.cid);
        return 0;
 
 error:
@@ -114,8 +175,7 @@ void rxrpc_destroy_client_conn_ids(void)
 }
 
 /*
- * Allocate a client connection.  The caller must take care to clear any
- * padding bytes in *cp.
+ * Allocate a client connection.
  */
 static struct rxrpc_connection *
 rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
@@ -131,6 +191,10 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
                return ERR_PTR(-ENOMEM);
        }
 
+       atomic_set(&conn->usage, 1);
+       if (conn->params.exclusive)
+               __set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+
        conn->params            = *cp;
        conn->out_clientflag    = RXRPC_CLIENT_INITIATED;
        conn->state             = RXRPC_CONN_CLIENT;
@@ -148,7 +212,7 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
                goto error_2;
 
        write_lock(&rxrpc_connection_lock);
-       list_add_tail(&conn->link, &rxrpc_connections);
+       list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list);
        write_unlock(&rxrpc_connection_lock);
 
        /* We steal the caller's peer ref. */
@@ -170,32 +234,68 @@ error_0:
 }
 
 /*
- * find a connection for a call
- * - called in process context with IRQs enabled
+ * Determine if a connection may be reused.
  */
-int rxrpc_connect_call(struct rxrpc_call *call,
-                      struct rxrpc_conn_parameters *cp,
-                      struct sockaddr_rxrpc *srx,
-                      gfp_t gfp)
+static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
+{
+       int id_cursor, id, distance, limit;
+
+       if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags))
+               goto dont_reuse;
+
+       if (conn->proto.epoch != rxrpc_epoch)
+               goto mark_dont_reuse;
+
+       /* The IDR tree gets very expensive on memory if the connection IDs are
+        * widely scattered throughout the number space, so we shall want to
+        * kill off connections that, say, have an ID more than about four
+        * times the maximum number of client conns away from the current
+        * allocation point to try and keep the IDs concentrated.
+        */
+       id_cursor = READ_ONCE(rxrpc_client_conn_ids.cur);
+       id = conn->proto.cid >> RXRPC_CIDSHIFT;
+       distance = id - id_cursor;
+       if (distance < 0)
+               distance = -distance;
+       limit = round_up(rxrpc_max_client_connections, IDR_SIZE) * 4;
+       if (distance > limit)
+               goto mark_dont_reuse;
+
+       return true;
+
+mark_dont_reuse:
+       set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+dont_reuse:
+       return false;
+}
+
+/*
+ * Create or find a client connection to use for a call.
+ *
+ * If we return with a connection, the call will be on its waiting list.  It's
+ * left to the caller to assign a channel and wake up the call.
+ */
+static int rxrpc_get_client_conn(struct rxrpc_call *call,
+                                struct rxrpc_conn_parameters *cp,
+                                struct sockaddr_rxrpc *srx,
+                                gfp_t gfp)
 {
        struct rxrpc_connection *conn, *candidate = NULL;
        struct rxrpc_local *local = cp->local;
        struct rb_node *p, **pp, *parent;
        long diff;
-       int chan;
-
-       DECLARE_WAITQUEUE(myself, current);
+       int ret = -ENOMEM;
 
        _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
 
        cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp);
        if (!cp->peer)
-               return -ENOMEM;
+               goto error;
 
+       /* If the connection is not meant to be exclusive, search the available
+        * connections to see if the connection we want to use already exists.
+        */
        if (!cp->exclusive) {
-               /* Search for a existing client connection unless this is going
-                * to be a connection that's used exclusively for a single call.
-                */
                _debug("search 1");
                spin_lock(&local->client_conns_lock);
                p = local->client_conns.rb_node;
@@ -206,39 +306,56 @@ int rxrpc_connect_call(struct rxrpc_call *call,
                        diff = (cmp(peer) ?:
                                cmp(key) ?:
                                cmp(security_level));
-                       if (diff < 0)
+#undef cmp
+                       if (diff < 0) {
                                p = p->rb_left;
-                       else if (diff > 0)
+                       } else if (diff > 0) {
                                p = p->rb_right;
-                       else
-                               goto found_extant_conn;
+                       } else {
+                               if (rxrpc_may_reuse_conn(conn) &&
+                                   rxrpc_get_connection_maybe(conn))
+                                       goto found_extant_conn;
+                               /* The connection needs replacing.  It's better
+                                * to effect that when we have something to
+                                * replace it with so that we don't have to
+                                * rebalance the tree twice.
+                                */
+                               break;
+                       }
                }
                spin_unlock(&local->client_conns_lock);
        }
 
-       /* We didn't find a connection or we want an exclusive one. */
-       _debug("get new conn");
+       /* There wasn't a connection yet or we need an exclusive connection.
+        * We need to create a candidate and then potentially redo the search
+        * in case we're racing with another thread also trying to connect on a
+        * shareable connection.
+        */
+       _debug("new conn");
        candidate = rxrpc_alloc_client_connection(cp, gfp);
-       if (!candidate) {
-               _leave(" = -ENOMEM");
-               return -ENOMEM;
+       if (IS_ERR(candidate)) {
+               ret = PTR_ERR(candidate);
+               goto error_peer;
        }
 
+       /* Add the call to the new connection's waiting list in case we're
+        * going to have to wait for the connection to come live.  It's our
+        * connection, so we want first dibs on the channel slots.  We would
+        * normally have to take channel_lock but we do this before anyone else
+        * can see the connection.
+        */
+       list_add_tail(&call->chan_wait_link, &candidate->waiting_calls);
+
        if (cp->exclusive) {
-               /* Assign the call on an exclusive connection to channel 0 and
-                * don't add the connection to the endpoint's shareable conn
-                * lookup tree.
-                */
-               _debug("exclusive chan 0");
-               conn = candidate;
-               atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
-               spin_lock(&conn->channel_lock);
-               chan = 0;
-               goto found_channel;
+               call->conn = candidate;
+               call->security_ix = candidate->security_ix;
+               _leave(" = 0 [exclusive %d]", candidate->debug_id);
+               return 0;
        }
 
-       /* We need to redo the search before attempting to add a new connection
-        * lest we race with someone else adding a conflicting instance.
+       /* Publish the new connection for userspace to find.  We need to redo
+        * the search before doing this lest we race with someone else adding a
+        * conflicting instance.
         */
        _debug("search 2");
        spin_lock(&local->client_conns_lock);
@@ -249,124 +366,679 @@ int rxrpc_connect_call(struct rxrpc_call *call,
                parent = *pp;
                conn = rb_entry(parent, struct rxrpc_connection, client_node);
 
+#define cmp(X) ((long)conn->params.X - (long)candidate->params.X)
                diff = (cmp(peer) ?:
                        cmp(key) ?:
                        cmp(security_level));
-               if (diff < 0)
+#undef cmp
+               if (diff < 0) {
                        pp = &(*pp)->rb_left;
-               else if (diff > 0)
+               } else if (diff > 0) {
                        pp = &(*pp)->rb_right;
-               else
-                       goto found_extant_conn;
+               } else {
+                       if (rxrpc_may_reuse_conn(conn) &&
+                           rxrpc_get_connection_maybe(conn))
+                               goto found_extant_conn;
+                       /* The old connection is from an outdated epoch. */
+                       _debug("replace conn");
+                       clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags);
+                       rb_replace_node(&conn->client_node,
+                                       &candidate->client_node,
+                                       &local->client_conns);
+                       goto candidate_published;
+               }
        }
 
-       /* The second search also failed; simply add the new connection with
-        * the new call in channel 0.  Note that we need to take the channel
-        * lock before dropping the client conn lock.
-        */
        _debug("new conn");
-       set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
        rb_link_node(&candidate->client_node, parent, pp);
        rb_insert_color(&candidate->client_node, &local->client_conns);
-attached:
-       conn = candidate;
-       candidate = NULL;
 
-       atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
-       spin_lock(&conn->channel_lock);
+candidate_published:
+       set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
+       call->conn = candidate;
+       call->security_ix = candidate->security_ix;
        spin_unlock(&local->client_conns_lock);
-       chan = 0;
+       _leave(" = 0 [new %d]", candidate->debug_id);
+       return 0;
 
-found_channel:
-       _debug("found chan");
-       call->conn      = conn;
-       call->channel   = chan;
-       call->epoch     = conn->proto.epoch;
-       call->cid       = conn->proto.cid | chan;
-       call->call_id   = ++conn->channels[chan].call_counter;
-       conn->channels[chan].call_id = call->call_id;
-       rcu_assign_pointer(conn->channels[chan].call, call);
+       /* We come here if we found a suitable connection already in existence.
+        * Discard any candidate we may have allocated, and try to get a
+        * channel on this one.
+        */
+found_extant_conn:
+       _debug("found conn");
+       spin_unlock(&local->client_conns_lock);
 
-       _net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id);
+       rxrpc_put_connection(candidate);
+       candidate = NULL;
 
+       spin_lock(&conn->channel_lock);
+       call->conn = conn;
+       call->security_ix = conn->security_ix;
+       list_add(&call->chan_wait_link, &conn->waiting_calls);
        spin_unlock(&conn->channel_lock);
+       _leave(" = 0 [extant %d]", conn->debug_id);
+       return 0;
+
+error_peer:
        rxrpc_put_peer(cp->peer);
        cp->peer = NULL;
-       _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
-       return 0;
+error:
+       _leave(" = %d", ret);
+       return ret;
+}
 
-       /* We found a potentially suitable connection already in existence.  If
-        * we can reuse it (ie. its usage count hasn't been reduced to 0 by the
-        * reaper), discard any candidate we may have allocated, and try to get
-        * a channel on this one, otherwise we have to replace it.
-        */
-found_extant_conn:
-       _debug("found conn");
-       if (!rxrpc_get_connection_maybe(conn)) {
-               set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
-               rb_replace_node(&conn->client_node,
-                               &candidate->client_node,
-                               &local->client_conns);
-               clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags);
-               goto attached;
+/*
+ * Activate a connection.
+ */
+static void rxrpc_activate_conn(struct rxrpc_connection *conn)
+{
+       conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE;
+       rxrpc_nr_active_client_conns++;
+       list_move_tail(&conn->cache_link, &rxrpc_active_client_conns);
+}
+
+/*
+ * Attempt to animate a connection for a new call.
+ *
+ * If it's not exclusive, the connection is in the endpoint tree, and we're in
+ * the conn's list of those waiting to grab a channel.  There is, however, a
+ * limit on the number of live connections allowed at any one time, so we may
+ * have to wait for capacity to become available.
+ *
+ * Note that a connection on the waiting queue might *also* have active
+ * channels if it has been culled to make space and then re-requested by a new
+ * call.
+ */
+static void rxrpc_animate_client_conn(struct rxrpc_connection *conn)
+{
+       unsigned int nr_conns;
+
+       _enter("%d,%d", conn->debug_id, conn->cache_state);
+
+       if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE)
+               goto out;
+
+       spin_lock(&rxrpc_client_conn_cache_lock);
+
+       nr_conns = rxrpc_nr_client_conns;
+       if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags))
+               rxrpc_nr_client_conns = nr_conns + 1;
+
+       switch (conn->cache_state) {
+       case RXRPC_CONN_CLIENT_ACTIVE:
+       case RXRPC_CONN_CLIENT_WAITING:
+               break;
+
+       case RXRPC_CONN_CLIENT_INACTIVE:
+       case RXRPC_CONN_CLIENT_CULLED:
+       case RXRPC_CONN_CLIENT_IDLE:
+               if (nr_conns >= rxrpc_max_client_connections)
+                       goto wait_for_capacity;
+               goto activate_conn;
+
+       default:
+               BUG();
        }
 
-       spin_unlock(&local->client_conns_lock);
+out_unlock:
+       spin_unlock(&rxrpc_client_conn_cache_lock);
+out:
+       _leave(" [%d]", conn->cache_state);
+       return;
 
-       rxrpc_put_connection(candidate);
+activate_conn:
+       _debug("activate");
+       rxrpc_activate_conn(conn);
+       goto out_unlock;
+
+wait_for_capacity:
+       _debug("wait");
+       conn->cache_state = RXRPC_CONN_CLIENT_WAITING;
+       list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns);
+       goto out_unlock;
+}
+
+/*
+ * Deactivate a channel.
+ */
+static void rxrpc_deactivate_one_channel(struct rxrpc_connection *conn,
+                                        unsigned int channel)
+{
+       struct rxrpc_channel *chan = &conn->channels[channel];
+
+       rcu_assign_pointer(chan->call, NULL);
+       conn->active_chans &= ~(1 << channel);
+}
+
+/*
+ * Assign a channel to the call at the front of the queue and wake the call up.
+ * We don't increment the callNumber counter until this number has been exposed
+ * to the world.
+ */
+static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
+                                      unsigned int channel)
+{
+       struct rxrpc_channel *chan = &conn->channels[channel];
+       struct rxrpc_call *call = list_entry(conn->waiting_calls.next,
+                                            struct rxrpc_call, chan_wait_link);
+       u32 call_id = chan->call_counter + 1;
+
+       write_lock_bh(&call->state_lock);
+       call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
+       write_unlock_bh(&call->state_lock);
+
+       rxrpc_see_call(call);
+       list_del_init(&call->chan_wait_link);
+       conn->active_chans |= 1 << channel;
+       call->peer      = rxrpc_get_peer(conn->params.peer);
+       call->cid       = conn->proto.cid | channel;
+       call->call_id   = call_id;
+
+       _net("CONNECT call %08x:%08x as call %d on conn %d",
+            call->cid, call->call_id, call->debug_id, conn->debug_id);
+
+       /* Paired with the read barrier in rxrpc_wait_for_channel().  This
+        * orders cid and epoch in the connection wrt to call_id without the
+        * need to take the channel_lock.
+        *
+        * We provisionally assign a callNumber at this point, but we don't
+        * confirm it until the call is about to be exposed.
+        *
+        * TODO: Pair with a barrier in the data_ready handler when that looks
+        * at the call ID through a connection channel.
+        */
+       smp_wmb();
+       chan->call_id   = call_id;
+       rcu_assign_pointer(chan->call, call);
+       wake_up(&call->waitq);
+}
+
+/*
+ * Assign channels and callNumbers to waiting calls.
+ */
+static void rxrpc_activate_channels(struct rxrpc_connection *conn)
+{
+       unsigned char mask;
+
+       _enter("%d", conn->debug_id);
+
+       if (conn->cache_state != RXRPC_CONN_CLIENT_ACTIVE ||
+           conn->active_chans == RXRPC_ACTIVE_CHANS_MASK)
+               return;
+
+       spin_lock(&conn->channel_lock);
+
+       while (!list_empty(&conn->waiting_calls) &&
+              (mask = ~conn->active_chans,
+               mask &= RXRPC_ACTIVE_CHANS_MASK,
+               mask != 0))
+               rxrpc_activate_one_channel(conn, __ffs(mask));
+
+       spin_unlock(&conn->channel_lock);
+       _leave("");
+}
+
+/*
+ * Wait for a callNumber and a channel to be granted to a call.
+ */
+static int rxrpc_wait_for_channel(struct rxrpc_call *call, gfp_t gfp)
+{
+       int ret = 0;
+
+       _enter("%d", call->debug_id);
+
+       if (!call->call_id) {
+               DECLARE_WAITQUEUE(myself, current);
 
-       if (!atomic_add_unless(&conn->avail_chans, -1, 0)) {
                if (!gfpflags_allow_blocking(gfp)) {
-                       rxrpc_put_connection(conn);
-                       _leave(" = -EAGAIN");
-                       return -EAGAIN;
+                       ret = -EAGAIN;
+                       goto out;
                }
 
-               add_wait_queue(&conn->channel_wq, &myself);
+               add_wait_queue_exclusive(&call->waitq, &myself);
                for (;;) {
                        set_current_state(TASK_INTERRUPTIBLE);
-                       if (atomic_add_unless(&conn->avail_chans, -1, 0))
+                       if (call->call_id)
                                break;
-                       if (signal_pending(current))
-                               goto interrupted;
+                       if (signal_pending(current)) {
+                               ret = -ERESTARTSYS;
+                               break;
+                       }
                        schedule();
                }
-               remove_wait_queue(&conn->channel_wq, &myself);
+               remove_wait_queue(&call->waitq, &myself);
                __set_current_state(TASK_RUNNING);
        }
 
-       /* The connection allegedly now has a free channel and we can now
-        * attach the call to it.
-        */
+       /* Paired with the write barrier in rxrpc_activate_one_channel(). */
+       smp_rmb();
+
+out:
+       _leave(" = %d", ret);
+       return ret;
+}
+
+/*
+ * find a connection for a call
+ * - called in process context with IRQs enabled
+ */
+int rxrpc_connect_call(struct rxrpc_call *call,
+                      struct rxrpc_conn_parameters *cp,
+                      struct sockaddr_rxrpc *srx,
+                      gfp_t gfp)
+{
+       int ret;
+
+       _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+
+       rxrpc_discard_expired_client_conns(NULL);
+       rxrpc_cull_active_client_conns();
+
+       ret = rxrpc_get_client_conn(call, cp, srx, gfp);
+       if (ret < 0)
+               return ret;
+
+       rxrpc_animate_client_conn(call->conn);
+       rxrpc_activate_channels(call->conn);
+
+       ret = rxrpc_wait_for_channel(call, gfp);
+       if (ret < 0)
+               rxrpc_disconnect_client_call(call);
+
+       _leave(" = %d", ret);
+       return ret;
+}
+
+/*
+ * Note that a connection is about to be exposed to the world.  Once it is
+ * exposed, we maintain an extra ref on it that stops it from being summarily
+ * discarded before it's (a) had a chance to deal with retransmission and (b)
+ * had a chance at re-use (the per-connection security negotiation is
+ * expensive).
+ */
+static void rxrpc_expose_client_conn(struct rxrpc_connection *conn)
+{
+       if (!test_and_set_bit(RXRPC_CONN_EXPOSED, &conn->flags))
+               rxrpc_get_connection(conn);
+}
+
+/*
+ * Note that a call, and thus a connection, is about to be exposed to the
+ * world.
+ */
+void rxrpc_expose_client_call(struct rxrpc_call *call)
+{
+       struct rxrpc_connection *conn = call->conn;
+       struct rxrpc_channel *chan =
+               &conn->channels[call->cid & RXRPC_CHANNELMASK];
+
+       if (!test_and_set_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+               /* Mark the call ID as being used.  If the callNumber counter
+                * exceeds ~2 billion, we kill the connection after its
+                * outstanding calls have finished so that the counter doesn't
+                * wrap.
+                */
+               chan->call_counter++;
+               if (chan->call_counter >= INT_MAX)
+                       set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+               rxrpc_expose_client_conn(conn);
+       }
+}
+
+/*
+ * Disconnect a client call.
+ */
+void rxrpc_disconnect_client_call(struct rxrpc_call *call)
+{
+       unsigned int channel = call->cid & RXRPC_CHANNELMASK;
+       struct rxrpc_connection *conn = call->conn;
+       struct rxrpc_channel *chan = &conn->channels[channel];
+
+       call->conn = NULL;
+
        spin_lock(&conn->channel_lock);
 
-       for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
-               if (!conn->channels[chan].call)
-                       goto found_channel;
-       BUG();
+       /* Calls that have never actually been assigned a channel can simply be
+        * discarded.  If the conn didn't get used either, it will follow
+        * immediately unless someone else grabs it in the meantime.
+        */
+       if (!list_empty(&call->chan_wait_link)) {
+               _debug("call is waiting");
+               ASSERTCMP(call->call_id, ==, 0);
+               ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags));
+               list_del_init(&call->chan_wait_link);
+
+               /* We must deactivate or idle the connection if it's now
+                * waiting for nothing.
+                */
+               spin_lock(&rxrpc_client_conn_cache_lock);
+               if (conn->cache_state == RXRPC_CONN_CLIENT_WAITING &&
+                   list_empty(&conn->waiting_calls) &&
+                   !conn->active_chans)
+                       goto idle_connection;
+               goto out;
+       }
+
+       ASSERTCMP(rcu_access_pointer(chan->call), ==, call);
+       ASSERTCMP(atomic_read(&conn->usage), >=, 2);
+
+       /* If a client call was exposed to the world, we save the result for
+        * retransmission.
+        *
+        * We use a barrier here so that the call number and abort code can be
+        * read without needing to take a lock.
+        *
+        * TODO: Make the incoming packet handler check this and handle
+        * terminal retransmission without requiring access to the call.
+        */
+       if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+               _debug("exposed %u,%u", call->call_id, call->abort_code);
+               __rxrpc_disconnect_call(conn, call);
+       }
+
+       /* See if we can pass the channel directly to another call. */
+       if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE &&
+           !list_empty(&conn->waiting_calls)) {
+               _debug("pass chan");
+               rxrpc_activate_one_channel(conn, channel);
+               goto out_2;
+       }
+
+       /* Things are more complex and we need the cache lock.  We might be
+        * able to simply idle the conn or it might now be lurking on the wait
+        * list.  It might even get moved back to the active list whilst we're
+        * waiting for the lock.
+        */
+       spin_lock(&rxrpc_client_conn_cache_lock);
+
+       switch (conn->cache_state) {
+       case RXRPC_CONN_CLIENT_ACTIVE:
+               if (list_empty(&conn->waiting_calls)) {
+                       rxrpc_deactivate_one_channel(conn, channel);
+                       if (!conn->active_chans) {
+                               rxrpc_nr_active_client_conns--;
+                               goto idle_connection;
+                       }
+                       goto out;
+               }
+
+               _debug("pass chan 2");
+               rxrpc_activate_one_channel(conn, channel);
+               goto out;
+
+       case RXRPC_CONN_CLIENT_CULLED:
+               rxrpc_deactivate_one_channel(conn, channel);
+               ASSERT(list_empty(&conn->waiting_calls));
+               if (!conn->active_chans)
+                       goto idle_connection;
+               goto out;
+
+       case RXRPC_CONN_CLIENT_WAITING:
+               rxrpc_deactivate_one_channel(conn, channel);
+               goto out;
+
+       default:
+               BUG();
+       }
 
-interrupted:
-       remove_wait_queue(&conn->channel_wq, &myself);
-       __set_current_state(TASK_RUNNING);
+out:
+       spin_unlock(&rxrpc_client_conn_cache_lock);
+out_2:
+       spin_unlock(&conn->channel_lock);
        rxrpc_put_connection(conn);
-       rxrpc_put_peer(cp->peer);
-       cp->peer = NULL;
-       _leave(" = -ERESTARTSYS");
-       return -ERESTARTSYS;
+       _leave("");
+       return;
+
+idle_connection:
+       /* As no channels remain active, the connection gets deactivated
+        * immediately or moved to the idle list for a short while.
+        */
+       if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) {
+               _debug("make idle");
+               conn->idle_timestamp = jiffies;
+               conn->cache_state = RXRPC_CONN_CLIENT_IDLE;
+               list_move_tail(&conn->cache_link, &rxrpc_idle_client_conns);
+               if (rxrpc_idle_client_conns.next == &conn->cache_link &&
+                   !rxrpc_kill_all_client_conns)
+                       queue_delayed_work(rxrpc_workqueue,
+                                          &rxrpc_client_conn_reap,
+                                          rxrpc_conn_idle_client_expiry);
+       } else {
+               _debug("make inactive");
+               conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
+               list_del_init(&conn->cache_link);
+       }
+       goto out;
 }
 
 /*
- * Remove a client connection from the local endpoint's tree, thereby removing
- * it as a target for reuse for new client calls.
+ * Clean up a dead client connection.
  */
-void rxrpc_unpublish_client_conn(struct rxrpc_connection *conn)
+static struct rxrpc_connection *
+rxrpc_put_one_client_conn(struct rxrpc_connection *conn)
 {
+       struct rxrpc_connection *next;
        struct rxrpc_local *local = conn->params.local;
+       unsigned int nr_conns;
 
-       spin_lock(&local->client_conns_lock);
-       if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags))
-               rb_erase(&conn->client_node, &local->client_conns);
-       spin_unlock(&local->client_conns_lock);
+       if (test_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags)) {
+               spin_lock(&local->client_conns_lock);
+               if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS,
+                                      &conn->flags))
+                       rb_erase(&conn->client_node, &local->client_conns);
+               spin_unlock(&local->client_conns_lock);
+       }
 
        rxrpc_put_client_connection_id(conn);
+
+       ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_INACTIVE);
+
+       if (!test_bit(RXRPC_CONN_COUNTED, &conn->flags))
+               return NULL;
+
+       spin_lock(&rxrpc_client_conn_cache_lock);
+       nr_conns = --rxrpc_nr_client_conns;
+
+       next = NULL;
+       if (nr_conns < rxrpc_max_client_connections &&
+           !list_empty(&rxrpc_waiting_client_conns)) {
+               next = list_entry(rxrpc_waiting_client_conns.next,
+                                 struct rxrpc_connection, cache_link);
+               rxrpc_get_connection(next);
+               rxrpc_activate_conn(next);
+       }
+
+       spin_unlock(&rxrpc_client_conn_cache_lock);
+       rxrpc_kill_connection(conn);
+
+       if (next)
+               rxrpc_activate_channels(next);
+
+       /* We need to get rid of the temporary ref we took upon next, but we
+        * can't call rxrpc_put_connection() recursively.
+        */
+       return next;
+}
+
+/*
+ * Clean up a dead client connections.
+ */
+void rxrpc_put_client_conn(struct rxrpc_connection *conn)
+{
+       struct rxrpc_connection *next;
+
+       do {
+               _enter("%p{u=%d,d=%d}",
+                      conn, atomic_read(&conn->usage), conn->debug_id);
+
+               next = rxrpc_put_one_client_conn(conn);
+
+               if (!next)
+                       break;
+               conn = next;
+       } while (atomic_dec_and_test(&conn->usage));
+
+       _leave("");
+}
+
+/*
+ * Kill the longest-active client connections to make room for new ones.
+ */
+static void rxrpc_cull_active_client_conns(void)
+{
+       struct rxrpc_connection *conn;
+       unsigned int nr_conns = rxrpc_nr_client_conns;
+       unsigned int nr_active, limit;
+
+       _enter("");
+
+       ASSERTCMP(nr_conns, >=, 0);
+       if (nr_conns < rxrpc_max_client_connections) {
+               _leave(" [ok]");
+               return;
+       }
+       limit = rxrpc_reap_client_connections;
+
+       spin_lock(&rxrpc_client_conn_cache_lock);
+       nr_active = rxrpc_nr_active_client_conns;
+
+       while (nr_active > limit) {
+               ASSERT(!list_empty(&rxrpc_active_client_conns));
+               conn = list_entry(rxrpc_active_client_conns.next,
+                                 struct rxrpc_connection, cache_link);
+               ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_ACTIVE);
+
+               if (list_empty(&conn->waiting_calls)) {
+                       conn->cache_state = RXRPC_CONN_CLIENT_CULLED;
+                       list_del_init(&conn->cache_link);
+               } else {
+                       conn->cache_state = RXRPC_CONN_CLIENT_WAITING;
+                       list_move_tail(&conn->cache_link,
+                                      &rxrpc_waiting_client_conns);
+               }
+
+               nr_active--;
+       }
+
+       rxrpc_nr_active_client_conns = nr_active;
+       spin_unlock(&rxrpc_client_conn_cache_lock);
+       ASSERTCMP(nr_active, >=, 0);
+       _leave(" [culled]");
+}
+
+/*
+ * Discard expired client connections from the idle list.  Each conn in the
+ * idle list has been exposed and holds an extra ref because of that.
+ *
+ * This may be called from conn setup or from a work item so cannot be
+ * considered non-reentrant.
+ */
+static void rxrpc_discard_expired_client_conns(struct work_struct *work)
+{
+       struct rxrpc_connection *conn;
+       unsigned long expiry, conn_expires_at, now;
+       unsigned int nr_conns;
+       bool did_discard = false;
+
+       _enter("%c", work ? 'w' : 'n');
+
+       if (list_empty(&rxrpc_idle_client_conns)) {
+               _leave(" [empty]");
+               return;
+       }
+
+       /* Don't double up on the discarding */
+       if (!spin_trylock(&rxrpc_client_conn_discard_mutex)) {
+               _leave(" [already]");
+               return;
+       }
+
+       /* We keep an estimate of what the number of conns ought to be after
+        * we've discarded some so that we don't overdo the discarding.
+        */
+       nr_conns = rxrpc_nr_client_conns;
+
+next:
+       spin_lock(&rxrpc_client_conn_cache_lock);
+
+       if (list_empty(&rxrpc_idle_client_conns))
+               goto out;
+
+       conn = list_entry(rxrpc_idle_client_conns.next,
+                         struct rxrpc_connection, cache_link);
+       ASSERT(test_bit(RXRPC_CONN_EXPOSED, &conn->flags));
+
+       if (!rxrpc_kill_all_client_conns) {
+               /* If the number of connections is over the reap limit, we
+                * expedite discard by reducing the expiry timeout.  We must,
+                * however, have at least a short grace period to be able to do
+                * final-ACK or ABORT retransmission.
+                */
+               expiry = rxrpc_conn_idle_client_expiry;
+               if (nr_conns > rxrpc_reap_client_connections)
+                       expiry = rxrpc_conn_idle_client_fast_expiry;
+
+               conn_expires_at = conn->idle_timestamp + expiry;
+
+               now = READ_ONCE(jiffies);
+               if (time_after(conn_expires_at, now))
+                       goto not_yet_expired;
+       }
+
+       _debug("discard conn %d", conn->debug_id);
+       if (!test_and_clear_bit(RXRPC_CONN_EXPOSED, &conn->flags))
+               BUG();
+       conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
+       list_del_init(&conn->cache_link);
+
+       spin_unlock(&rxrpc_client_conn_cache_lock);
+
+       /* When we cleared the EXPOSED flag, we took on responsibility for the
+        * reference that that had on the usage count.  We deal with that here.
+        * If someone re-sets the flag and re-gets the ref, that's fine.
+        */
+       rxrpc_put_connection(conn);
+       did_discard = true;
+       nr_conns--;
+       goto next;
+
+not_yet_expired:
+       /* The connection at the front of the queue hasn't yet expired, so
+        * schedule the work item for that point if we discarded something.
+        *
+        * We don't worry if the work item is already scheduled - it can look
+        * after rescheduling itself at a later time.  We could cancel it, but
+        * then things get messier.
+        */
+       _debug("not yet");
+       if (!rxrpc_kill_all_client_conns)
+               queue_delayed_work(rxrpc_workqueue,
+                                  &rxrpc_client_conn_reap,
+                                  conn_expires_at - now);
+
+out:
+       spin_unlock(&rxrpc_client_conn_cache_lock);
+       spin_unlock(&rxrpc_client_conn_discard_mutex);
+       _leave("");
+}
+
+/*
+ * Preemptively destroy all the client connection records rather than waiting
+ * for them to time out
+ */
+void __exit rxrpc_destroy_all_client_connections(void)
+{
+       _enter("");
+
+       spin_lock(&rxrpc_client_conn_cache_lock);
+       rxrpc_kill_all_client_conns = true;
+       spin_unlock(&rxrpc_client_conn_cache_lock);
+
+       cancel_delayed_work(&rxrpc_client_conn_reap);
+
+       if (!queue_delayed_work(rxrpc_workqueue, &rxrpc_client_conn_reap, 0))
+               _debug("destroy: queue failed");
+
+       _leave("");
 }
index cee0f35..0691007 100644 (file)
 #include <linux/net.h>
 #include <linux/skbuff.h>
 #include <linux/errqueue.h>
-#include <linux/udp.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/icmp.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <net/ip.h>
 #include "ar-internal.h"
 
+/*
+ * Retransmit terminal ACK or ABORT of the previous call.
+ */
+static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
+                                      struct sk_buff *skb)
+{
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+       struct rxrpc_channel *chan;
+       struct msghdr msg;
+       struct kvec iov;
+       struct {
+               struct rxrpc_wire_header whdr;
+               union {
+                       struct {
+                               __be32 code;
+                       } abort;
+                       struct {
+                               struct rxrpc_ackpacket ack;
+                               u8 padding[3];
+                               struct rxrpc_ackinfo info;
+                       };
+               };
+       } __attribute__((packed)) pkt;
+       size_t len;
+       u32 serial, mtu, call_id;
+
+       _enter("%d", conn->debug_id);
+
+       chan = &conn->channels[sp->hdr.cid & RXRPC_CHANNELMASK];
+
+       /* If the last call got moved on whilst we were waiting to run, just
+        * ignore this packet.
+        */
+       call_id = READ_ONCE(chan->last_call);
+       /* Sync with __rxrpc_disconnect_call() */
+       smp_rmb();
+       if (call_id != sp->hdr.callNumber)
+               return;
+
+       msg.msg_name    = &conn->params.peer->srx.transport;
+       msg.msg_namelen = conn->params.peer->srx.transport_len;
+       msg.msg_control = NULL;
+       msg.msg_controllen = 0;
+       msg.msg_flags   = 0;
+
+       pkt.whdr.epoch          = htonl(sp->hdr.epoch);
+       pkt.whdr.cid            = htonl(sp->hdr.cid);
+       pkt.whdr.callNumber     = htonl(sp->hdr.callNumber);
+       pkt.whdr.seq            = 0;
+       pkt.whdr.type           = chan->last_type;
+       pkt.whdr.flags          = conn->out_clientflag;
+       pkt.whdr.userStatus     = 0;
+       pkt.whdr.securityIndex  = conn->security_ix;
+       pkt.whdr._rsvd          = 0;
+       pkt.whdr.serviceId      = htons(chan->last_service_id);
+
+       len = sizeof(pkt.whdr);
+       switch (chan->last_type) {
+       case RXRPC_PACKET_TYPE_ABORT:
+               pkt.abort.code  = htonl(chan->last_abort);
+               len += sizeof(pkt.abort);
+               break;
+
+       case RXRPC_PACKET_TYPE_ACK:
+               mtu = conn->params.peer->if_mtu;
+               mtu -= conn->params.peer->hdrsize;
+               pkt.ack.bufferSpace     = 0;
+               pkt.ack.maxSkew         = htons(skb->priority);
+               pkt.ack.firstPacket     = htonl(chan->last_seq);
+               pkt.ack.previousPacket  = htonl(chan->last_seq - 1);
+               pkt.ack.serial          = htonl(sp->hdr.serial);
+               pkt.ack.reason          = RXRPC_ACK_DUPLICATE;
+               pkt.ack.nAcks           = 0;
+               pkt.info.rxMTU          = htonl(rxrpc_rx_mtu);
+               pkt.info.maxMTU         = htonl(mtu);
+               pkt.info.rwind          = htonl(rxrpc_rx_window_size);
+               pkt.info.jumbo_max      = htonl(rxrpc_rx_jumbo_max);
+               len += sizeof(pkt.ack) + sizeof(pkt.info);
+               break;
+       }
+
+       /* Resync with __rxrpc_disconnect_call() and check that the last call
+        * didn't get advanced whilst we were filling out the packets.
+        */
+       smp_rmb();
+       if (READ_ONCE(chan->last_call) != call_id)
+               return;
+
+       iov.iov_base    = &pkt;
+       iov.iov_len     = len;
+
+       serial = atomic_inc_return(&conn->serial);
+       pkt.whdr.serial = htonl(serial);
+
+       switch (chan->last_type) {
+       case RXRPC_PACKET_TYPE_ABORT:
+               _proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort);
+               break;
+       case RXRPC_PACKET_TYPE_ACK:
+               _proto("Tx ACK %%%u [re]", serial);
+               break;
+       }
+
+       kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len);
+       _leave("");
+       return;
+}
+
 /*
  * pass a connection-level abort onto all calls on that connection
  */
-static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
-                             u32 abort_code)
+static void rxrpc_abort_calls(struct rxrpc_connection *conn,
+                             enum rxrpc_call_completion compl,
+                             u32 abort_code, int error)
 {
        struct rxrpc_call *call;
        int i;
@@ -41,19 +146,15 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
                call = rcu_dereference_protected(
                        conn->channels[i].call,
                        lockdep_is_held(&conn->channel_lock));
-               write_lock_bh(&call->state_lock);
-               if (call->state <= RXRPC_CALL_COMPLETE) {
-                       call->state = state;
-                       if (state == RXRPC_CALL_LOCALLY_ABORTED) {
-                               call->local_abort = conn->local_abort;
-                               set_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events);
-                       } else {
-                               call->remote_abort = conn->remote_abort;
-                               set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
-                       }
-                       rxrpc_queue_call(call);
+               if (call) {
+                       if (compl == RXRPC_CALL_LOCALLY_ABORTED)
+                               trace_rxrpc_abort("CON", call->cid,
+                                                 call->call_id, 0,
+                                                 abort_code, error);
+                       if (rxrpc_set_call_completion(call, compl,
+                                                     abort_code, error))
+                               rxrpc_notify_socket(call);
                }
-               write_unlock_bh(&call->state_lock);
        }
 
        spin_unlock(&conn->channel_lock);
@@ -78,17 +179,16 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
 
        /* generate a connection-level abort */
        spin_lock_bh(&conn->state_lock);
-       if (conn->state < RXRPC_CONN_REMOTELY_ABORTED) {
-               conn->state = RXRPC_CONN_LOCALLY_ABORTED;
-               conn->error = error;
-               spin_unlock_bh(&conn->state_lock);
-       } else {
+       if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
                spin_unlock_bh(&conn->state_lock);
                _leave(" = 0 [already dead]");
                return 0;
        }
 
-       rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code);
+       conn->state = RXRPC_CONN_LOCALLY_ABORTED;
+       spin_unlock_bh(&conn->state_lock);
+
+       rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error);
 
        msg.msg_name    = &conn->params.peer->srx.transport;
        msg.msg_namelen = conn->params.peer->srx.transport_len;
@@ -132,17 +232,18 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
 
 /*
  * mark a call as being on a now-secured channel
- * - must be called with softirqs disabled
+ * - must be called with BH's disabled.
  */
 static void rxrpc_call_is_secure(struct rxrpc_call *call)
 {
        _enter("%p", call);
        if (call) {
-               read_lock(&call->state_lock);
-               if (call->state < RXRPC_CALL_COMPLETE &&
-                   !test_and_set_bit(RXRPC_CALL_EV_SECURED, &call->events))
-                       rxrpc_queue_call(call);
-               read_unlock(&call->state_lock);
+               write_lock_bh(&call->state_lock);
+               if (call->state == RXRPC_CALL_SERVER_SECURING) {
+                       call->state = RXRPC_CALL_SERVER_ACCEPTING;
+                       rxrpc_notify_socket(call);
+               }
+               write_unlock_bh(&call->state_lock);
        }
 }
 
@@ -159,22 +260,27 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
        int loop, ret;
 
        if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
-               kleave(" = -ECONNABORTED [%u]", conn->state);
+               _leave(" = -ECONNABORTED [%u]", conn->state);
                return -ECONNABORTED;
        }
 
        _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
 
        switch (sp->hdr.type) {
+       case RXRPC_PACKET_TYPE_DATA:
+       case RXRPC_PACKET_TYPE_ACK:
+               rxrpc_conn_retransmit_call(conn, skb);
+               return 0;
+
        case RXRPC_PACKET_TYPE_ABORT:
-               if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0)
+               if (skb_copy_bits(skb, sp->offset, &wtmp, sizeof(wtmp)) < 0)
                        return -EPROTO;
                abort_code = ntohl(wtmp);
                _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code);
 
                conn->state = RXRPC_CONN_REMOTELY_ABORTED;
                rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED,
-                                 abort_code);
+                                 abort_code, ECONNABORTED);
                return -ECONNABORTED;
 
        case RXRPC_PACKET_TYPE_CHALLENGE:
@@ -199,14 +305,16 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
 
                if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
                        conn->state = RXRPC_CONN_SERVICE;
+                       spin_unlock(&conn->state_lock);
                        for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
                                rxrpc_call_is_secure(
                                        rcu_dereference_protected(
                                                conn->channels[loop].call,
                                                lockdep_is_held(&conn->channel_lock)));
+               } else {
+                       spin_unlock(&conn->state_lock);
                }
 
-               spin_unlock(&conn->state_lock);
                spin_unlock(&conn->channel_lock);
                return 0;
 
@@ -277,6 +385,7 @@ void rxrpc_process_connection(struct work_struct *work)
        /* go through the conn-level event packets, releasing the ref on this
         * connection that each one has when we've finished with it */
        while ((skb = skb_dequeue(&conn->rx_queue))) {
+               rxrpc_see_skb(skb);
                ret = rxrpc_process_event(conn, skb, &abort_code);
                switch (ret) {
                case -EPROTO:
@@ -308,87 +417,3 @@ protocol_error:
        _leave(" [EPROTO]");
        goto out;
 }
-
-/*
- * put a packet up for transport-level abort
- */
-void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
-{
-       CHECK_SLAB_OKAY(&local->usage);
-
-       skb_queue_tail(&local->reject_queue, skb);
-       rxrpc_queue_local(local);
-}
-
-/*
- * reject packets through the local endpoint
- */
-void rxrpc_reject_packets(struct rxrpc_local *local)
-{
-       union {
-               struct sockaddr sa;
-               struct sockaddr_in sin;
-       } sa;
-       struct rxrpc_skb_priv *sp;
-       struct rxrpc_wire_header whdr;
-       struct sk_buff *skb;
-       struct msghdr msg;
-       struct kvec iov[2];
-       size_t size;
-       __be32 code;
-
-       _enter("%d", local->debug_id);
-
-       iov[0].iov_base = &whdr;
-       iov[0].iov_len = sizeof(whdr);
-       iov[1].iov_base = &code;
-       iov[1].iov_len = sizeof(code);
-       size = sizeof(whdr) + sizeof(code);
-
-       msg.msg_name = &sa;
-       msg.msg_control = NULL;
-       msg.msg_controllen = 0;
-       msg.msg_flags = 0;
-
-       memset(&sa, 0, sizeof(sa));
-       sa.sa.sa_family = local->srx.transport.family;
-       switch (sa.sa.sa_family) {
-       case AF_INET:
-               msg.msg_namelen = sizeof(sa.sin);
-               break;
-       default:
-               msg.msg_namelen = 0;
-               break;
-       }
-
-       memset(&whdr, 0, sizeof(whdr));
-       whdr.type = RXRPC_PACKET_TYPE_ABORT;
-
-       while ((skb = skb_dequeue(&local->reject_queue))) {
-               sp = rxrpc_skb(skb);
-               switch (sa.sa.sa_family) {
-               case AF_INET:
-                       sa.sin.sin_port = udp_hdr(skb)->source;
-                       sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
-                       code = htonl(skb->priority);
-
-                       whdr.epoch      = htonl(sp->hdr.epoch);
-                       whdr.cid        = htonl(sp->hdr.cid);
-                       whdr.callNumber = htonl(sp->hdr.callNumber);
-                       whdr.serviceId  = htons(sp->hdr.serviceId);
-                       whdr.flags      = sp->hdr.flags;
-                       whdr.flags      ^= RXRPC_CLIENT_INITIATED;
-                       whdr.flags      &= RXRPC_CLIENT_INITIATED;
-
-                       kernel_sendmsg(local->socket, &msg, iov, 2, size);
-                       break;
-
-               default:
-                       break;
-               }
-
-               rxrpc_free_skb(skb);
-       }
-
-       _leave("");
-}
index 896d844..bb1f292 100644 (file)
@@ -1,6 +1,6 @@
-/* RxRPC virtual connection handler
+/* RxRPC virtual connection handler, common bits.
  *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -15,8 +15,6 @@
 #include <linux/slab.h>
 #include <linux/net.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
 /*
@@ -27,9 +25,12 @@ unsigned int rxrpc_connection_expiry = 10 * 60;
 static void rxrpc_connection_reaper(struct work_struct *work);
 
 LIST_HEAD(rxrpc_connections);
+LIST_HEAD(rxrpc_connection_proc_list);
 DEFINE_RWLOCK(rxrpc_connection_lock);
 static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
 
+static void rxrpc_destroy_connection(struct rcu_head *);
+
 /*
  * allocate a new connection
  */
@@ -41,21 +42,19 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
 
        conn = kzalloc(sizeof(struct rxrpc_connection), gfp);
        if (conn) {
+               INIT_LIST_HEAD(&conn->cache_link);
                spin_lock_init(&conn->channel_lock);
-               init_waitqueue_head(&conn->channel_wq);
+               INIT_LIST_HEAD(&conn->waiting_calls);
                INIT_WORK(&conn->processor, &rxrpc_process_connection);
+               INIT_LIST_HEAD(&conn->proc_link);
                INIT_LIST_HEAD(&conn->link);
                skb_queue_head_init(&conn->rx_queue);
                conn->security = &rxrpc_no_security;
                spin_lock_init(&conn->state_lock);
-               /* We maintain an extra ref on the connection whilst it is
-                * on the rxrpc_connections list.
-                */
-               atomic_set(&conn->usage, 2);
                conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
-               atomic_set(&conn->avail_chans, RXRPC_MAXCALLS);
                conn->size_align = 4;
                conn->header_size = sizeof(struct rxrpc_wire_header);
+               conn->idle_timestamp = jiffies;
        }
 
        _leave(" = %p{%d}", conn, conn ? conn->debug_id : 0);
@@ -135,6 +134,16 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
                            srx.transport.sin.sin_addr.s_addr)
                                goto not_found;
                        break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+               case AF_INET6:
+                       if (peer->srx.transport.sin6.sin6_port !=
+                           srx.transport.sin6.sin6_port ||
+                           memcmp(&peer->srx.transport.sin6.sin6_addr,
+                                  &srx.transport.sin6.sin6_addr,
+                                  sizeof(struct in6_addr)) != 0)
+                               goto not_found;
+                       break;
+#endif
                default:
                        BUG();
                }
@@ -153,25 +162,32 @@ not_found:
  * terminates.  The caller must hold the channel_lock and must release the
  * call's ref on the connection.
  */
-void __rxrpc_disconnect_call(struct rxrpc_call *call)
+void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
+                            struct rxrpc_call *call)
 {
-       struct rxrpc_connection *conn = call->conn;
-       struct rxrpc_channel *chan = &conn->channels[call->channel];
+       struct rxrpc_channel *chan =
+               &conn->channels[call->cid & RXRPC_CHANNELMASK];
 
-       _enter("%d,%d", conn->debug_id, call->channel);
+       _enter("%d,%x", conn->debug_id, call->cid);
 
        if (rcu_access_pointer(chan->call) == call) {
                /* Save the result of the call so that we can repeat it if necessary
                 * through the channel, whilst disposing of the actual call record.
                 */
-               chan->last_result = call->local_abort;
+               chan->last_service_id = call->service_id;
+               if (call->abort_code) {
+                       chan->last_abort = call->abort_code;
+                       chan->last_type = RXRPC_PACKET_TYPE_ABORT;
+               } else {
+                       chan->last_seq = call->rx_hard_ack;
+                       chan->last_type = RXRPC_PACKET_TYPE_ACK;
+               }
+               /* Sync with rxrpc_conn_retransmit(). */
                smp_wmb();
                chan->last_call = chan->call_id;
                chan->call_id = chan->call_counter;
 
                rcu_assign_pointer(chan->call, NULL);
-               atomic_inc(&conn->avail_chans);
-               wake_up(&conn->channel_wq);
        }
 
        _leave("");
@@ -185,34 +201,56 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
 {
        struct rxrpc_connection *conn = call->conn;
 
+       spin_lock_bh(&conn->params.peer->lock);
+       hlist_del_init(&call->error_link);
+       spin_unlock_bh(&conn->params.peer->lock);
+
+       if (rxrpc_is_client_call(call))
+               return rxrpc_disconnect_client_call(call);
+
        spin_lock(&conn->channel_lock);
-       __rxrpc_disconnect_call(call);
+       __rxrpc_disconnect_call(conn, call);
        spin_unlock(&conn->channel_lock);
 
        call->conn = NULL;
+       conn->idle_timestamp = jiffies;
        rxrpc_put_connection(conn);
 }
 
 /*
- * release a virtual connection
+ * Kill off a connection.
  */
-void rxrpc_put_connection(struct rxrpc_connection *conn)
+void rxrpc_kill_connection(struct rxrpc_connection *conn)
 {
-       if (!conn)
-               return;
+       ASSERT(!rcu_access_pointer(conn->channels[0].call) &&
+              !rcu_access_pointer(conn->channels[1].call) &&
+              !rcu_access_pointer(conn->channels[2].call) &&
+              !rcu_access_pointer(conn->channels[3].call));
+       ASSERT(list_empty(&conn->cache_link));
 
-       _enter("%p{u=%d,d=%d}",
-              conn, atomic_read(&conn->usage), conn->debug_id);
+       write_lock(&rxrpc_connection_lock);
+       list_del_init(&conn->proc_link);
+       write_unlock(&rxrpc_connection_lock);
 
-       ASSERTCMP(atomic_read(&conn->usage), >, 1);
+       /* Drain the Rx queue.  Note that even though we've unpublished, an
+        * incoming packet could still be being added to our Rx queue, so we
+        * will need to drain it again in the RCU cleanup handler.
+        */
+       rxrpc_purge_queue(&conn->rx_queue);
 
-       conn->put_time = ktime_get_seconds();
-       if (atomic_dec_return(&conn->usage) == 1) {
-               _debug("zombie");
-               rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
-       }
+       /* Leave final destruction to RCU.  The connection processor work item
+        * must carry a ref on the connection to prevent us getting here whilst
+        * it is queued or running.
+        */
+       call_rcu(&conn->rcu, rxrpc_destroy_connection);
+}
 
-       _leave("");
+/*
+ * release a virtual connection
+ */
+void __rxrpc_put_connection(struct rxrpc_connection *conn)
+{
+       rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
 }
 
 /*
@@ -242,19 +280,19 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu)
 }
 
 /*
- * reap dead connections
+ * reap dead service connections
  */
 static void rxrpc_connection_reaper(struct work_struct *work)
 {
        struct rxrpc_connection *conn, *_p;
-       unsigned long reap_older_than, earliest, put_time, now;
+       unsigned long reap_older_than, earliest, idle_timestamp, now;
 
        LIST_HEAD(graveyard);
 
        _enter("");
 
-       now = ktime_get_seconds();
-       reap_older_than =  now - rxrpc_connection_expiry;
+       now = jiffies;
+       reap_older_than = now - rxrpc_connection_expiry * HZ;
        earliest = ULONG_MAX;
 
        write_lock(&rxrpc_connection_lock);
@@ -262,11 +300,17 @@ static void rxrpc_connection_reaper(struct work_struct *work)
                ASSERTCMP(atomic_read(&conn->usage), >, 0);
                if (likely(atomic_read(&conn->usage) > 1))
                        continue;
+               if (conn->state == RXRPC_CONN_SERVICE_PREALLOC)
+                       continue;
+
+               idle_timestamp = READ_ONCE(conn->idle_timestamp);
+               _debug("reap CONN %d { u=%d,t=%ld }",
+                      conn->debug_id, atomic_read(&conn->usage),
+                      (long)reap_older_than - (long)idle_timestamp);
 
-               put_time = READ_ONCE(conn->put_time);
-               if (time_after(put_time, reap_older_than)) {
-                       if (time_before(put_time, earliest))
-                               earliest = put_time;
+               if (time_after(idle_timestamp, reap_older_than)) {
+                       if (time_before(idle_timestamp, earliest))
+                               earliest = idle_timestamp;
                        continue;
                }
 
@@ -277,7 +321,7 @@ static void rxrpc_connection_reaper(struct work_struct *work)
                        continue;
 
                if (rxrpc_conn_is_client(conn))
-                       rxrpc_unpublish_client_conn(conn);
+                       BUG();
                else
                        rxrpc_unpublish_service_conn(conn);
 
@@ -287,9 +331,9 @@ static void rxrpc_connection_reaper(struct work_struct *work)
 
        if (earliest != ULONG_MAX) {
                _debug("reschedule reaper %ld", (long) earliest - now);
-               ASSERTCMP(earliest, >, now);
+               ASSERT(time_after(earliest, now));
                rxrpc_queue_delayed_work(&rxrpc_connection_reap,
-                                        (earliest - now) * HZ);
+                                        earliest - now);
        }
 
        while (!list_empty(&graveyard)) {
@@ -298,16 +342,15 @@ static void rxrpc_connection_reaper(struct work_struct *work)
                list_del_init(&conn->link);
 
                ASSERTCMP(atomic_read(&conn->usage), ==, 0);
-               skb_queue_purge(&conn->rx_queue);
-               call_rcu(&conn->rcu, rxrpc_destroy_connection);
+               rxrpc_kill_connection(conn);
        }
 
        _leave("");
 }
 
 /*
- * preemptively destroy all the connection records rather than waiting for them
- * to time out
+ * preemptively destroy all the service connection records rather than
+ * waiting for them to time out
  */
 void __exit rxrpc_destroy_all_connections(void)
 {
@@ -316,6 +359,8 @@ void __exit rxrpc_destroy_all_connections(void)
 
        _enter("");
 
+       rxrpc_destroy_all_client_connections();
+
        rxrpc_connection_expiry = 0;
        cancel_delayed_work(&rxrpc_connection_reap);
        rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
@@ -330,6 +375,8 @@ void __exit rxrpc_destroy_all_connections(void)
        write_unlock(&rxrpc_connection_lock);
        BUG_ON(leak);
 
+       ASSERT(list_empty(&rxrpc_connection_proc_list));
+
        /* Make sure the local and peer records pinned by any dying connections
         * are released.
         */
index fd9027c..83d54da 100644 (file)
@@ -65,9 +65,8 @@ done:
  * Insert a service connection into a peer's tree, thereby making it a target
  * for incoming packets.
  */
-static struct rxrpc_connection *
-rxrpc_publish_service_conn(struct rxrpc_peer *peer,
-                          struct rxrpc_connection *conn)
+static void rxrpc_publish_service_conn(struct rxrpc_peer *peer,
+                                      struct rxrpc_connection *conn)
 {
        struct rxrpc_connection *cursor = NULL;
        struct rxrpc_conn_proto k = conn->proto;
@@ -96,7 +95,7 @@ conn_published:
        set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags);
        write_sequnlock_bh(&peer->service_conn_lock);
        _leave(" = %d [new]", conn->debug_id);
-       return conn;
+       return;
 
 found_extant_conn:
        if (atomic_read(&cursor->usage) == 0)
@@ -119,100 +118,54 @@ replace_old_connection:
 }
 
 /*
- * get a record of an incoming connection
+ * Preallocate a service connection.  The connection is placed on the proc and
+ * reap lists so that we don't have to get the lock from BH context.
  */
-struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local,
-                                                  struct sockaddr_rxrpc *srx,
-                                                  struct sk_buff *skb)
+struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp)
 {
-       struct rxrpc_connection *conn;
-       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-       struct rxrpc_peer *peer;
-       const char *new = "old";
+       struct rxrpc_connection *conn = rxrpc_alloc_connection(gfp);
 
-       _enter("");
+       if (conn) {
+               /* We maintain an extra ref on the connection whilst it is on
+                * the rxrpc_connections list.
+                */
+               conn->state = RXRPC_CONN_SERVICE_PREALLOC;
+               atomic_set(&conn->usage, 2);
 
-       peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
-       if (!peer) {
-               _debug("no peer");
-               return ERR_PTR(-EBUSY);
+               write_lock(&rxrpc_connection_lock);
+               list_add_tail(&conn->link, &rxrpc_connections);
+               list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list);
+               write_unlock(&rxrpc_connection_lock);
        }
 
-       ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED);
-
-       rcu_read_lock();
-       peer = rxrpc_lookup_peer_rcu(local, srx);
-       if (peer) {
-               conn = rxrpc_find_service_conn_rcu(peer, skb);
-               if (conn) {
-                       if (sp->hdr.securityIndex != conn->security_ix)
-                               goto security_mismatch_rcu;
-                       if (rxrpc_get_connection_maybe(conn))
-                               goto found_extant_connection_rcu;
-
-                       /* The conn has expired but we can't remove it without
-                        * the appropriate lock, so we attempt to replace it
-                        * when we have a new candidate.
-                        */
-               }
-
-               if (!rxrpc_get_peer_maybe(peer))
-                       peer = NULL;
-       }
-       rcu_read_unlock();
+       return conn;
+}
 
-       if (!peer) {
-               peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
-               if (!peer)
-                       goto enomem;
-       }
+/*
+ * Set up an incoming connection.  This is called in BH context with the RCU
+ * read lock held.
+ */
+void rxrpc_new_incoming_connection(struct rxrpc_connection *conn,
+                                  struct sk_buff *skb)
+{
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 
-       /* We don't have a matching record yet. */
-       conn = rxrpc_alloc_connection(GFP_NOIO);
-       if (!conn)
-               goto enomem_peer;
+       _enter("");
 
        conn->proto.epoch       = sp->hdr.epoch;
        conn->proto.cid         = sp->hdr.cid & RXRPC_CIDMASK;
-       conn->params.local      = local;
-       conn->params.peer       = peer;
        conn->params.service_id = sp->hdr.serviceId;
        conn->security_ix       = sp->hdr.securityIndex;
        conn->out_clientflag    = 0;
-       conn->state             = RXRPC_CONN_SERVICE;
-       if (conn->params.service_id)
+       if (conn->security_ix)
                conn->state     = RXRPC_CONN_SERVICE_UNSECURED;
-
-       rxrpc_get_local(local);
-
-       write_lock(&rxrpc_connection_lock);
-       list_add_tail(&conn->link, &rxrpc_connections);
-       write_unlock(&rxrpc_connection_lock);
+       else
+               conn->state     = RXRPC_CONN_SERVICE;
 
        /* Make the connection a target for incoming packets. */
-       rxrpc_publish_service_conn(peer, conn);
-
-       new = "new";
-
-success:
-       _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid);
-       _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
-       return conn;
-
-found_extant_connection_rcu:
-       rcu_read_unlock();
-       goto success;
-
-security_mismatch_rcu:
-       rcu_read_unlock();
-       _leave(" = -EKEYREJECTED");
-       return ERR_PTR(-EKEYREJECTED);
+       rxrpc_publish_service_conn(conn->params.peer, conn);
 
-enomem_peer:
-       rxrpc_put_peer(peer);
-enomem:
-       _leave(" = -ENOMEM");
-       return ERR_PTR(-ENOMEM);
+       _net("CONNECTION new %d {%x}", conn->debug_id, conn->proto.cid);
 }
 
 /*
index 70bb778..75af0bd 100644 (file)
@@ -1,6 +1,6 @@
 /* RxRPC packet reception
  *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
 #include <net/net_namespace.h>
 #include "ar-internal.h"
 
+static void rxrpc_proto_abort(const char *why,
+                             struct rxrpc_call *call, rxrpc_seq_t seq)
+{
+       if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, EBADMSG)) {
+               set_bit(RXRPC_CALL_EV_ABORT, &call->events);
+               rxrpc_queue_call(call);
+       }
+}
+
 /*
- * queue a packet for recvmsg to pass to userspace
- * - the caller must hold a lock on call->lock
- * - must not be called with interrupts disabled (sk_filter() disables BH's)
- * - eats the packet whether successful or not
- * - there must be just one reference to the packet, which the caller passes to
- *   this function
+ * Apply a hard ACK by advancing the Tx window.
  */
-int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
-                       bool force, bool terminal)
+static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to)
 {
-       struct rxrpc_skb_priv *sp;
-       struct rxrpc_sock *rx = call->socket;
-       struct sock *sk;
-       int ret;
+       struct sk_buff *skb, *list = NULL;
+       int ix;
 
-       _enter(",,%d,%d", force, terminal);
+       spin_lock(&call->lock);
 
-       ASSERT(!irqs_disabled());
+       while (before(call->tx_hard_ack, to)) {
+               call->tx_hard_ack++;
+               ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK;
+               skb = call->rxtx_buffer[ix];
+               rxrpc_see_skb(skb);
+               call->rxtx_buffer[ix] = NULL;
+               call->rxtx_annotations[ix] = 0;
+               skb->next = list;
+               list = skb;
+       }
 
-       sp = rxrpc_skb(skb);
-       ASSERTCMP(sp->call, ==, call);
+       spin_unlock(&call->lock);
+
+       wake_up(&call->waitq);
 
-       /* if we've already posted the terminal message for a call, then we
-        * don't post any more */
-       if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
-               _debug("already terminated");
-               ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE);
+       while (list) {
+               skb = list;
+               list = skb->next;
+               skb->next = NULL;
                rxrpc_free_skb(skb);
-               return 0;
        }
+}
 
-       sk = &rx->sk;
-
-       if (!force) {
-               /* cast skb->rcvbuf to unsigned...  It's pointless, but
-                * reduces number of warnings when compiling with -W
-                * --ANK */
-//             ret = -ENOBUFS;
-//             if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
-//                 (unsigned int) sk->sk_rcvbuf)
-//                     goto out;
+/*
+ * End the transmission phase of a call.
+ *
+ * This occurs when we get an ACKALL packet, the first DATA packet of a reply,
+ * or a final ACK packet.
+ */
+static bool rxrpc_end_tx_phase(struct rxrpc_call *call, const char *abort_why)
+{
+       _enter("");
 
-               ret = sk_filter(sk, skb);
-               if (ret < 0)
-                       goto out;
+       switch (call->state) {
+       case RXRPC_CALL_CLIENT_RECV_REPLY:
+               return true;
+       case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+       case RXRPC_CALL_SERVER_AWAIT_ACK:
+               break;
+       default:
+               rxrpc_proto_abort(abort_why, call, call->tx_top);
+               return false;
        }
 
-       spin_lock_bh(&sk->sk_receive_queue.lock);
-       if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags) &&
-           !test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
-           call->socket->sk.sk_state != RXRPC_CLOSE) {
-               skb->destructor = rxrpc_packet_destructor;
-               skb->dev = NULL;
-               skb->sk = sk;
-               atomic_add(skb->truesize, &sk->sk_rmem_alloc);
-
-               if (terminal) {
-                       _debug("<<<< TERMINAL MESSAGE >>>>");
-                       set_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags);
-               }
+       rxrpc_rotate_tx_window(call, call->tx_top);
 
-               /* allow interception by a kernel service */
-               if (rx->interceptor) {
-                       rx->interceptor(sk, call->user_call_ID, skb);
-                       spin_unlock_bh(&sk->sk_receive_queue.lock);
-               } else {
-                       _net("post skb %p", skb);
-                       __skb_queue_tail(&sk->sk_receive_queue, skb);
-                       spin_unlock_bh(&sk->sk_receive_queue.lock);
+       write_lock(&call->state_lock);
 
-                       if (!sock_flag(sk, SOCK_DEAD))
-                               sk->sk_data_ready(sk);
-               }
-               skb = NULL;
-       } else {
-               spin_unlock_bh(&sk->sk_receive_queue.lock);
+       switch (call->state) {
+       default:
+               break;
+       case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+               call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
+               break;
+       case RXRPC_CALL_SERVER_AWAIT_ACK:
+               __rxrpc_call_completed(call);
+               rxrpc_notify_socket(call);
+               break;
        }
-       ret = 0;
 
-out:
-       rxrpc_free_skb(skb);
+       write_unlock(&call->state_lock);
+       _leave(" = ok");
+       return true;
+}
 
-       _leave(" = %d", ret);
-       return ret;
+/*
+ * Scan a jumbo packet to validate its structure and to work out how many
+ * subpackets it contains.
+ *
+ * A jumbo packet is a collection of consecutive packets glued together with
+ * little headers between that indicate how to change the initial header for
+ * each subpacket.
+ *
+ * RXRPC_JUMBO_PACKET must be set on all but the last subpacket - and all but
+ * the last are RXRPC_JUMBO_DATALEN in size.  The last subpacket may be of any
+ * size.
+ */
+static bool rxrpc_validate_jumbo(struct sk_buff *skb)
+{
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+       unsigned int offset = sp->offset;
+       unsigned int len = skb->len;
+       int nr_jumbo = 1;
+       u8 flags = sp->hdr.flags;
+
+       do {
+               nr_jumbo++;
+               if (len - offset < RXRPC_JUMBO_SUBPKTLEN)
+                       goto protocol_error;
+               if (flags & RXRPC_LAST_PACKET)
+                       goto protocol_error;
+               offset += RXRPC_JUMBO_DATALEN;
+               if (skb_copy_bits(skb, offset, &flags, 1) < 0)
+                       goto protocol_error;
+               offset += sizeof(struct rxrpc_jumbo_header);
+       } while (flags & RXRPC_JUMBO_PACKET);
+
+       sp->nr_jumbo = nr_jumbo;
+       return true;
+
+protocol_error:
+       return false;
 }
 
 /*
- * process a DATA packet, posting the packet to the appropriate queue
- * - eats the packet if successful
+ * Handle reception of a duplicate packet.
+ *
+ * We have to take care to avoid an attack here whereby we're given a series of
+ * jumbograms, each with a sequence number one before the preceding one and
+ * filled up to maximum UDP size.  If they never send us the first packet in
+ * the sequence, they can cause us to have to hold on to around 2MiB of kernel
+ * space until the call times out.
+ *
+ * We limit the space usage by only accepting three duplicate jumbo packets per
+ * call.  After that, we tell the other side we're no longer accepting jumbos
+ * (that information is encoded in the ACK packet).
  */
-static int rxrpc_fast_process_data(struct rxrpc_call *call,
-                                  struct sk_buff *skb, u32 seq)
+static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq,
+                                u8 annotation, bool *_jumbo_bad)
 {
-       struct rxrpc_skb_priv *sp;
-       bool terminal;
-       int ret, ackbit, ack;
-       u32 serial;
-       u8 flags;
+       /* Discard normal packets that are duplicates. */
+       if (annotation == 0)
+               return;
 
-       _enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq);
+       /* Skip jumbo subpackets that are duplicates.  When we've had three or
+        * more partially duplicate jumbo packets, we refuse to take any more
+        * jumbos for this call.
+        */
+       if (!*_jumbo_bad) {
+               call->nr_jumbo_bad++;
+               *_jumbo_bad = true;
+       }
+}
 
-       sp = rxrpc_skb(skb);
-       ASSERTCMP(sp->call, ==, NULL);
-       flags = sp->hdr.flags;
-       serial = sp->hdr.serial;
+/*
+ * Process a DATA packet, adding the packet to the Rx ring.
+ */
+static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
+                            u16 skew)
+{
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+       unsigned int offset = sp->offset;
+       unsigned int ix;
+       rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
+       rxrpc_seq_t seq = sp->hdr.seq, hard_ack;
+       bool immediate_ack = false, jumbo_bad = false, queued;
+       u16 len;
+       u8 ack = 0, flags, annotation = 0;
 
-       spin_lock(&call->lock);
+       _enter("{%u,%u},{%u,%u}",
+              call->rx_hard_ack, call->rx_top, skb->len, seq);
 
-       if (call->state > RXRPC_CALL_COMPLETE)
-               goto discard;
+       _proto("Rx DATA %%%u { #%u f=%02x }",
+              sp->hdr.serial, seq, sp->hdr.flags);
 
-       ASSERTCMP(call->rx_data_expect, >=, call->rx_data_post);
-       ASSERTCMP(call->rx_data_post, >=, call->rx_data_recv);
-       ASSERTCMP(call->rx_data_recv, >=, call->rx_data_eaten);
+       if (call->state >= RXRPC_CALL_COMPLETE)
+               return;
 
-       if (seq < call->rx_data_post) {
-               _debug("dup #%u [-%u]", seq, call->rx_data_post);
-               ack = RXRPC_ACK_DUPLICATE;
-               ret = -ENOBUFS;
-               goto discard_and_ack;
-       }
+       /* Received data implicitly ACKs all of the request packets we sent
+        * when we're acting as a client.
+        */
+       if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY &&
+           !rxrpc_end_tx_phase(call, "ETD"))
+               return;
 
-       /* we may already have the packet in the out of sequence queue */
-       ackbit = seq - (call->rx_data_eaten + 1);
-       ASSERTCMP(ackbit, >=, 0);
-       if (__test_and_set_bit(ackbit, call->ackr_window)) {
-               _debug("dup oos #%u [%u,%u]",
-                      seq, call->rx_data_eaten, call->rx_data_post);
-               ack = RXRPC_ACK_DUPLICATE;
-               goto discard_and_ack;
-       }
+       call->ackr_prev_seq = seq;
 
-       if (seq >= call->ackr_win_top) {
-               _debug("exceed #%u [%u]", seq, call->ackr_win_top);
-               __clear_bit(ackbit, call->ackr_window);
+       hard_ack = READ_ONCE(call->rx_hard_ack);
+       if (after(seq, hard_ack + call->rx_winsize)) {
                ack = RXRPC_ACK_EXCEEDS_WINDOW;
-               goto discard_and_ack;
+               ack_serial = serial;
+               goto ack;
        }
 
-       if (seq == call->rx_data_expect) {
-               clear_bit(RXRPC_CALL_EXPECT_OOS, &call->flags);
-               call->rx_data_expect++;
-       } else if (seq > call->rx_data_expect) {
-               _debug("oos #%u [%u]", seq, call->rx_data_expect);
-               call->rx_data_expect = seq + 1;
-               if (test_and_set_bit(RXRPC_CALL_EXPECT_OOS, &call->flags)) {
-                       ack = RXRPC_ACK_OUT_OF_SEQUENCE;
-                       goto enqueue_and_ack;
+       flags = sp->hdr.flags;
+       if (flags & RXRPC_JUMBO_PACKET) {
+               if (call->nr_jumbo_bad > 3) {
+                       ack = RXRPC_ACK_NOSPACE;
+                       ack_serial = serial;
+                       goto ack;
                }
-               goto enqueue_packet;
+               annotation = 1;
        }
 
-       if (seq != call->rx_data_post) {
-               _debug("ahead #%u [%u]", seq, call->rx_data_post);
-               goto enqueue_packet;
+next_subpacket:
+       queued = false;
+       ix = seq & RXRPC_RXTX_BUFF_MASK;
+       len = skb->len;
+       if (flags & RXRPC_JUMBO_PACKET)
+               len = RXRPC_JUMBO_DATALEN;
+
+       if (flags & RXRPC_LAST_PACKET) {
+               if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
+                   seq != call->rx_top)
+                       return rxrpc_proto_abort("LSN", call, seq);
+       } else {
+               if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
+                   after_eq(seq, call->rx_top))
+                       return rxrpc_proto_abort("LSA", call, seq);
        }
 
-       if (test_bit(RXRPC_CALL_RCVD_LAST, &call->flags))
-               goto protocol_error;
-
-       /* if the packet need security things doing to it, then it goes down
-        * the slow path */
-       if (call->conn->security_ix)
-               goto enqueue_packet;
-
-       sp->call = call;
-       rxrpc_get_call(call);
-       atomic_inc(&call->skb_count);
-       terminal = ((flags & RXRPC_LAST_PACKET) &&
-                   !(flags & RXRPC_CLIENT_INITIATED));
-       ret = rxrpc_queue_rcv_skb(call, skb, false, terminal);
-       if (ret < 0) {
-               if (ret == -ENOMEM || ret == -ENOBUFS) {
-                       __clear_bit(ackbit, call->ackr_window);
-                       ack = RXRPC_ACK_NOSPACE;
-                       goto discard_and_ack;
+       if (before_eq(seq, hard_ack)) {
+               ack = RXRPC_ACK_DUPLICATE;
+               ack_serial = serial;
+               goto skip;
+       }
+
+       if (flags & RXRPC_REQUEST_ACK && !ack) {
+               ack = RXRPC_ACK_REQUESTED;
+               ack_serial = serial;
+       }
+
+       if (call->rxtx_buffer[ix]) {
+               rxrpc_input_dup_data(call, seq, annotation, &jumbo_bad);
+               if (ack != RXRPC_ACK_DUPLICATE) {
+                       ack = RXRPC_ACK_DUPLICATE;
+                       ack_serial = serial;
                }
-               goto out;
+               immediate_ack = true;
+               goto skip;
        }
 
-       skb = NULL;
-       sp = NULL;
-
-       _debug("post #%u", seq);
-       ASSERTCMP(call->rx_data_post, ==, seq);
-       call->rx_data_post++;
-
-       if (flags & RXRPC_LAST_PACKET)
-               set_bit(RXRPC_CALL_RCVD_LAST, &call->flags);
-
-       /* if we've reached an out of sequence packet then we need to drain
-        * that queue into the socket Rx queue now */
-       if (call->rx_data_post == call->rx_first_oos) {
-               _debug("drain rx oos now");
-               read_lock(&call->state_lock);
-               if (call->state < RXRPC_CALL_COMPLETE &&
-                   !test_and_set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events))
-                       rxrpc_queue_call(call);
-               read_unlock(&call->state_lock);
+       /* Queue the packet.  We use a couple of memory barriers here as need
+        * to make sure that rx_top is perceived to be set after the buffer
+        * pointer and that the buffer pointer is set after the annotation and
+        * the skb data.
+        *
+        * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window()
+        * and also rxrpc_fill_out_ack().
+        */
+       rxrpc_get_skb(skb);
+       call->rxtx_annotations[ix] = annotation;
+       smp_wmb();
+       call->rxtx_buffer[ix] = skb;
+       if (after(seq, call->rx_top))
+               smp_store_release(&call->rx_top, seq);
+       queued = true;
+
+       if (after_eq(seq, call->rx_expect_next)) {
+               if (after(seq, call->rx_expect_next)) {
+                       _net("OOS %u > %u", seq, call->rx_expect_next);
+                       ack = RXRPC_ACK_OUT_OF_SEQUENCE;
+                       ack_serial = serial;
+               }
+               call->rx_expect_next = seq + 1;
        }
 
-       spin_unlock(&call->lock);
-       atomic_inc(&call->ackr_not_idle);
-       rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false);
-       _leave(" = 0 [posted]");
-       return 0;
+skip:
+       offset += len;
+       if (flags & RXRPC_JUMBO_PACKET) {
+               if (skb_copy_bits(skb, offset, &flags, 1) < 0)
+                       return rxrpc_proto_abort("XJF", call, seq);
+               offset += sizeof(struct rxrpc_jumbo_header);
+               seq++;
+               serial++;
+               annotation++;
+               if (flags & RXRPC_JUMBO_PACKET)
+                       annotation |= RXRPC_RX_ANNO_JLAST;
+               if (after(seq, hard_ack + call->rx_winsize)) {
+                       ack = RXRPC_ACK_EXCEEDS_WINDOW;
+                       ack_serial = serial;
+                       if (!jumbo_bad) {
+                               call->nr_jumbo_bad++;
+                               jumbo_bad = true;
+                       }
+                       goto ack;
+               }
 
-protocol_error:
-       ret = -EBADMSG;
-out:
-       spin_unlock(&call->lock);
-       _leave(" = %d", ret);
-       return ret;
+               _proto("Rx DATA Jumbo %%%u", serial);
+               goto next_subpacket;
+       }
 
-discard_and_ack:
-       _debug("discard and ACK packet %p", skb);
-       __rxrpc_propose_ACK(call, ack, serial, true);
-discard:
-       spin_unlock(&call->lock);
-       rxrpc_free_skb(skb);
-       _leave(" = 0 [discarded]");
-       return 0;
+       if (queued && flags & RXRPC_LAST_PACKET && !ack) {
+               ack = RXRPC_ACK_DELAY;
+               ack_serial = serial;
+       }
 
-enqueue_and_ack:
-       __rxrpc_propose_ACK(call, ack, serial, true);
-enqueue_packet:
-       _net("defer skb %p", skb);
-       spin_unlock(&call->lock);
-       skb_queue_tail(&call->rx_queue, skb);
-       atomic_inc(&call->ackr_not_idle);
-       read_lock(&call->state_lock);
-       if (call->state < RXRPC_CALL_DEAD)
-               rxrpc_queue_call(call);
-       read_unlock(&call->state_lock);
-       _leave(" = 0 [queued]");
-       return 0;
+ack:
+       if (ack)
+               rxrpc_propose_ACK(call, ack, skew, ack_serial,
+                                 immediate_ack, true);
+
+       if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1)
+               rxrpc_notify_socket(call);
+       _leave(" [queued]");
 }
 
 /*
- * assume an implicit ACKALL of the transmission phase of a client socket upon
- * reception of the first reply packet
+ * Process the extra information that may be appended to an ACK packet
  */
-static void rxrpc_assume_implicit_ackall(struct rxrpc_call *call, u32 serial)
+static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
+                               struct rxrpc_ackinfo *ackinfo)
 {
-       write_lock_bh(&call->state_lock);
-
-       switch (call->state) {
-       case RXRPC_CALL_CLIENT_AWAIT_REPLY:
-               call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
-               call->acks_latest = serial;
-
-               _debug("implicit ACKALL %%%u", call->acks_latest);
-               set_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events);
-               write_unlock_bh(&call->state_lock);
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+       struct rxrpc_peer *peer;
+       unsigned int mtu;
+       u32 rwind = ntohl(ackinfo->rwind);
+
+       _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
+              sp->hdr.serial,
+              ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
+              rwind, ntohl(ackinfo->jumbo_max));
+
+       if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
+               rwind = RXRPC_RXTX_BUFF_SIZE - 1;
+       call->tx_winsize = rwind;
+
+       mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU));
+
+       peer = call->peer;
+       if (mtu < peer->maxdata) {
+               spin_lock_bh(&peer->lock);
+               peer->maxdata = mtu;
+               peer->mtu = mtu + peer->hdrsize;
+               spin_unlock_bh(&peer->lock);
+               _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
+       }
+}
 
-               if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
-                       clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
-                       clear_bit(RXRPC_CALL_EV_RESEND, &call->events);
-                       clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+/*
+ * Process individual soft ACKs.
+ *
+ * Each ACK in the array corresponds to one packet and can be either an ACK or
+ * a NAK.  If we get find an explicitly NAK'd packet we resend immediately;
+ * packets that lie beyond the end of the ACK list are scheduled for resend by
+ * the timer on the basis that the peer might just not have processed them at
+ * the time the ACK was sent.
+ */
+static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
+                                 rxrpc_seq_t seq, int nr_acks)
+{
+       bool resend = false;
+       int ix;
+
+       for (; nr_acks > 0; nr_acks--, seq++) {
+               ix = seq & RXRPC_RXTX_BUFF_MASK;
+               switch (*acks) {
+               case RXRPC_ACK_TYPE_ACK:
+                       call->rxtx_annotations[ix] = RXRPC_TX_ANNO_ACK;
+                       break;
+               case RXRPC_ACK_TYPE_NACK:
+                       if (call->rxtx_annotations[ix] == RXRPC_TX_ANNO_NAK)
+                               continue;
+                       call->rxtx_annotations[ix] = RXRPC_TX_ANNO_NAK;
+                       resend = true;
+                       break;
+               default:
+                       return rxrpc_proto_abort("SFT", call, 0);
                }
-               break;
-
-       default:
-               write_unlock_bh(&call->state_lock);
-               break;
        }
+
+       if (resend &&
+           !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
+               rxrpc_queue_call(call);
 }
 
 /*
- * post an incoming packet to the nominated call to deal with
- * - must get rid of the sk_buff, either by freeing it or by queuing it
+ * Process an ACK packet.
+ *
+ * ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet
+ * in the ACK array.  Anything before that is hard-ACK'd and may be discarded.
+ *
+ * A hard-ACK means that a packet has been processed and may be discarded; a
+ * soft-ACK means that the packet may be discarded and retransmission
+ * requested.  A phase is complete when all packets are hard-ACK'd.
  */
-void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
+static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
+                           u16 skew)
 {
        struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-       __be32 wtmp;
-       u32 hi_serial, abort_code;
-
-       _enter("%p,%p", call, skb);
-
-       ASSERT(!irqs_disabled());
-
-#if 0 // INJECT RX ERROR
-       if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
-               static int skip = 0;
-               if (++skip == 3) {
-                       printk("DROPPED 3RD PACKET!!!!!!!!!!!!!\n");
-                       skip = 0;
-                       goto free_packet;
-               }
+       union {
+               struct rxrpc_ackpacket ack;
+               struct rxrpc_ackinfo info;
+               u8 acks[RXRPC_MAXACKS];
+       } buf;
+       rxrpc_seq_t first_soft_ack, hard_ack;
+       int nr_acks, offset;
+
+       _enter("");
+
+       if (skb_copy_bits(skb, sp->offset, &buf.ack, sizeof(buf.ack)) < 0) {
+               _debug("extraction failure");
+               return rxrpc_proto_abort("XAK", call, 0);
        }
-#endif
-
-       /* track the latest serial number on this connection for ACK packet
-        * information */
-       hi_serial = atomic_read(&call->conn->hi_serial);
-       while (sp->hdr.serial > hi_serial)
-               hi_serial = atomic_cmpxchg(&call->conn->hi_serial, hi_serial,
-                                          sp->hdr.serial);
-
-       /* request ACK generation for any ACK or DATA packet that requests
-        * it */
-       if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
-               _proto("ACK Requested on %%%u", sp->hdr.serial);
-               rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false);
+       sp->offset += sizeof(buf.ack);
+
+       first_soft_ack = ntohl(buf.ack.firstPacket);
+       hard_ack = first_soft_ack - 1;
+       nr_acks = buf.ack.nAcks;
+
+       _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
+              sp->hdr.serial,
+              ntohs(buf.ack.maxSkew),
+              first_soft_ack,
+              ntohl(buf.ack.previousPacket),
+              ntohl(buf.ack.serial),
+              rxrpc_acks(buf.ack.reason),
+              buf.ack.nAcks);
+
+       if (buf.ack.reason == RXRPC_ACK_PING) {
+               _proto("Rx ACK %%%u PING Request", sp->hdr.serial);
+               rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
+                                 skew, sp->hdr.serial, true, true);
+       } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
+               rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED,
+                                 skew, sp->hdr.serial, true, true);
        }
 
-       switch (sp->hdr.type) {
-       case RXRPC_PACKET_TYPE_ABORT:
-               _debug("abort");
-
-               if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0)
-                       goto protocol_error;
-
-               abort_code = ntohl(wtmp);
-               _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
-
-               write_lock_bh(&call->state_lock);
-               if (call->state < RXRPC_CALL_COMPLETE) {
-                       call->state = RXRPC_CALL_REMOTELY_ABORTED;
-                       call->remote_abort = abort_code;
-                       set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
-                       rxrpc_queue_call(call);
-               }
-               goto free_packet_unlock;
-
-       case RXRPC_PACKET_TYPE_BUSY:
-               _proto("Rx BUSY %%%u", sp->hdr.serial);
-
-               if (rxrpc_conn_is_service(call->conn))
-                       goto protocol_error;
+       offset = sp->offset + nr_acks + 3;
+       if (skb->len >= offset + sizeof(buf.info)) {
+               if (skb_copy_bits(skb, offset, &buf.info, sizeof(buf.info)) < 0)
+                       return rxrpc_proto_abort("XAI", call, 0);
+               rxrpc_input_ackinfo(call, skb, &buf.info);
+       }
 
-               write_lock_bh(&call->state_lock);
-               switch (call->state) {
-               case RXRPC_CALL_CLIENT_SEND_REQUEST:
-                       call->state = RXRPC_CALL_SERVER_BUSY;
-                       set_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events);
-                       rxrpc_queue_call(call);
-               case RXRPC_CALL_SERVER_BUSY:
-                       goto free_packet_unlock;
-               default:
-                       goto protocol_error_locked;
-               }
+       if (first_soft_ack == 0)
+               return rxrpc_proto_abort("AK0", call, 0);
 
+       /* Ignore ACKs unless we are or have just been transmitting. */
+       switch (call->state) {
+       case RXRPC_CALL_CLIENT_SEND_REQUEST:
+       case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+       case RXRPC_CALL_SERVER_SEND_REPLY:
+       case RXRPC_CALL_SERVER_AWAIT_ACK:
+               break;
        default:
-               _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial);
-               goto protocol_error;
-
-       case RXRPC_PACKET_TYPE_DATA:
-               _proto("Rx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq);
-
-               if (sp->hdr.seq == 0)
-                       goto protocol_error;
-
-               call->ackr_prev_seq = sp->hdr.seq;
+               return;
+       }
 
-               /* received data implicitly ACKs all of the request packets we
-                * sent when we're acting as a client */
-               if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
-                       rxrpc_assume_implicit_ackall(call, sp->hdr.serial);
+       /* Discard any out-of-order or duplicate ACKs. */
+       if ((int)sp->hdr.serial - (int)call->acks_latest <= 0) {
+               _debug("discard ACK %d <= %d",
+                      sp->hdr.serial, call->acks_latest);
+               return;
+       }
+       call->acks_latest = sp->hdr.serial;
 
-               switch (rxrpc_fast_process_data(call, skb, sp->hdr.seq)) {
-               case 0:
-                       skb = NULL;
-                       goto done;
+       if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) &&
+           hard_ack == call->tx_top) {
+               rxrpc_end_tx_phase(call, "ETA");
+               return;
+       }
 
-               default:
-                       BUG();
+       if (before(hard_ack, call->tx_hard_ack) ||
+           after(hard_ack, call->tx_top))
+               return rxrpc_proto_abort("AKW", call, 0);
 
-                       /* data packet received beyond the last packet */
-               case -EBADMSG:
-                       goto protocol_error;
-               }
+       if (after(hard_ack, call->tx_hard_ack))
+               rxrpc_rotate_tx_window(call, hard_ack);
 
-       case RXRPC_PACKET_TYPE_ACKALL:
-       case RXRPC_PACKET_TYPE_ACK:
-               /* ACK processing is done in process context */
-               read_lock_bh(&call->state_lock);
-               if (call->state < RXRPC_CALL_DEAD) {
-                       skb_queue_tail(&call->rx_queue, skb);
-                       rxrpc_queue_call(call);
-                       skb = NULL;
-               }
-               read_unlock_bh(&call->state_lock);
-               goto free_packet;
-       }
+       if (after(first_soft_ack, call->tx_top))
+               return;
 
-protocol_error:
-       _debug("protocol error");
-       write_lock_bh(&call->state_lock);
-protocol_error_locked:
-       if (call->state <= RXRPC_CALL_COMPLETE) {
-               call->state = RXRPC_CALL_LOCALLY_ABORTED;
-               call->local_abort = RX_PROTOCOL_ERROR;
-               set_bit(RXRPC_CALL_EV_ABORT, &call->events);
-               rxrpc_queue_call(call);
-       }
-free_packet_unlock:
-       write_unlock_bh(&call->state_lock);
-free_packet:
-       rxrpc_free_skb(skb);
-done:
-       _leave("");
+       if (nr_acks > call->tx_top - first_soft_ack + 1)
+               nr_acks = first_soft_ack - call->tx_top + 1;
+       if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0)
+               return rxrpc_proto_abort("XSA", call, 0);
+       rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks);
 }
 
 /*
- * split up a jumbo data packet
+ * Process an ACKALL packet.
  */
-static void rxrpc_process_jumbo_packet(struct rxrpc_call *call,
-                                      struct sk_buff *jumbo)
+static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
 {
-       struct rxrpc_jumbo_header jhdr;
-       struct rxrpc_skb_priv *sp;
-       struct sk_buff *part;
-
-       _enter(",{%u,%u}", jumbo->data_len, jumbo->len);
-
-       sp = rxrpc_skb(jumbo);
-
-       do {
-               sp->hdr.flags &= ~RXRPC_JUMBO_PACKET;
-
-               /* make a clone to represent the first subpacket in what's left
-                * of the jumbo packet */
-               part = skb_clone(jumbo, GFP_ATOMIC);
-               if (!part) {
-                       /* simply ditch the tail in the event of ENOMEM */
-                       pskb_trim(jumbo, RXRPC_JUMBO_DATALEN);
-                       break;
-               }
-               rxrpc_new_skb(part);
-
-               pskb_trim(part, RXRPC_JUMBO_DATALEN);
-
-               if (!pskb_pull(jumbo, RXRPC_JUMBO_DATALEN))
-                       goto protocol_error;
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 
-               if (skb_copy_bits(jumbo, 0, &jhdr, sizeof(jhdr)) < 0)
-                       goto protocol_error;
-               if (!pskb_pull(jumbo, sizeof(jhdr)))
-                       BUG();
+       _proto("Rx ACKALL %%%u", sp->hdr.serial);
 
-               sp->hdr.seq     += 1;
-               sp->hdr.serial  += 1;
-               sp->hdr.flags   = jhdr.flags;
-               sp->hdr._rsvd   = ntohs(jhdr._rsvd);
+       rxrpc_end_tx_phase(call, "ETL");
+}
 
-               _proto("Rx DATA Jumbo %%%u", sp->hdr.serial - 1);
+/*
+ * Process an ABORT packet.
+ */
+static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
+{
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+       __be32 wtmp;
+       u32 abort_code = RX_CALL_DEAD;
 
-               rxrpc_fast_process_packet(call, part);
-               part = NULL;
+       _enter("");
 
-       } while (sp->hdr.flags & RXRPC_JUMBO_PACKET);
+       if (skb->len >= 4 &&
+           skb_copy_bits(skb, sp->offset, &wtmp, sizeof(wtmp)) >= 0)
+               abort_code = ntohl(wtmp);
 
-       rxrpc_fast_process_packet(call, jumbo);
-       _leave("");
-       return;
+       _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
 
-protocol_error:
-       _debug("protocol error");
-       rxrpc_free_skb(part);
-       rxrpc_free_skb(jumbo);
-       write_lock_bh(&call->state_lock);
-       if (call->state <= RXRPC_CALL_COMPLETE) {
-               call->state = RXRPC_CALL_LOCALLY_ABORTED;
-               call->local_abort = RX_PROTOCOL_ERROR;
-               set_bit(RXRPC_CALL_EV_ABORT, &call->events);
-               rxrpc_queue_call(call);
-       }
-       write_unlock_bh(&call->state_lock);
-       _leave("");
+       if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+                                     abort_code, ECONNABORTED))
+               rxrpc_notify_socket(call);
 }
 
 /*
- * post an incoming packet to the appropriate call/socket to deal with
- * - must get rid of the sk_buff, either by freeing it or by queuing it
+ * Process an incoming call packet.
  */
-static void rxrpc_post_packet_to_call(struct rxrpc_call *call,
-                                     struct sk_buff *skb)
+static void rxrpc_input_call_packet(struct rxrpc_call *call,
+                                   struct sk_buff *skb, u16 skew)
 {
-       struct rxrpc_skb_priv *sp;
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 
        _enter("%p,%p", call, skb);
 
-       sp = rxrpc_skb(skb);
-
-       _debug("extant call [%d]", call->state);
+       switch (sp->hdr.type) {
+       case RXRPC_PACKET_TYPE_DATA:
+               rxrpc_input_data(call, skb, skew);
+               break;
 
-       read_lock(&call->state_lock);
-       switch (call->state) {
-       case RXRPC_CALL_LOCALLY_ABORTED:
-               if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
-                       rxrpc_queue_call(call);
-                       goto free_unlock;
-               }
-       case RXRPC_CALL_REMOTELY_ABORTED:
-       case RXRPC_CALL_NETWORK_ERROR:
-       case RXRPC_CALL_DEAD:
-               goto dead_call;
-       case RXRPC_CALL_COMPLETE:
-       case RXRPC_CALL_CLIENT_FINAL_ACK:
-               /* complete server call */
-               if (rxrpc_conn_is_service(call->conn))
-                       goto dead_call;
-               /* resend last packet of a completed call */
-               _debug("final ack again");
-               rxrpc_get_call(call);
-               set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events);
-               rxrpc_queue_call(call);
-               goto free_unlock;
-       default:
+       case RXRPC_PACKET_TYPE_ACK:
+               rxrpc_input_ack(call, skb, skew);
                break;
-       }
 
-       read_unlock(&call->state_lock);
-       rxrpc_get_call(call);
+       case RXRPC_PACKET_TYPE_BUSY:
+               _proto("Rx BUSY %%%u", sp->hdr.serial);
 
-       if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
-           sp->hdr.flags & RXRPC_JUMBO_PACKET)
-               rxrpc_process_jumbo_packet(call, skb);
-       else
-               rxrpc_fast_process_packet(call, skb);
+               /* Just ignore BUSY packets from the server; the retry and
+                * lifespan timers will take care of business.  BUSY packets
+                * from the client don't make sense.
+                */
+               break;
+
+       case RXRPC_PACKET_TYPE_ABORT:
+               rxrpc_input_abort(call, skb);
+               break;
 
-       rxrpc_put_call(call);
-       goto done;
+       case RXRPC_PACKET_TYPE_ACKALL:
+               rxrpc_input_ackall(call, skb);
+               break;
 
-dead_call:
-       if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
-               skb->priority = RX_CALL_DEAD;
-               rxrpc_reject_packet(call->conn->params.local, skb);
-               goto unlock;
+       default:
+               _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial);
+               break;
        }
-free_unlock:
-       rxrpc_free_skb(skb);
-unlock:
-       read_unlock(&call->state_lock);
-done:
+
        _leave("");
 }
 
 /*
  * post connection-level events to the connection
- * - this includes challenges, responses and some aborts
+ * - this includes challenges, responses, some aborts and call terminal packet
+ *   retransmission.
  */
 static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
                                      struct sk_buff *skb)
@@ -594,6 +611,17 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
        rxrpc_queue_local(local);
 }
 
+/*
+ * put a packet up for transport-level abort
+ */
+static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
+{
+       CHECK_SLAB_OKAY(&local->usage);
+
+       skb_queue_tail(&local->reject_queue, skb);
+       rxrpc_queue_local(local);
+}
+
 /*
  * Extract the wire header from a packet and translate the byte order.
  */
@@ -605,8 +633,6 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
        /* dig out the RxRPC connection details */
        if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0)
                return -EBADMSG;
-       if (!pskb_pull(skb, sizeof(whdr)))
-               BUG();
 
        memset(sp, 0, sizeof(*sp));
        sp->hdr.epoch           = ntohl(whdr.epoch);
@@ -620,6 +646,7 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
        sp->hdr.securityIndex   = whdr.securityIndex;
        sp->hdr._rsvd           = ntohs(whdr._rsvd);
        sp->hdr.serviceId       = ntohs(whdr.serviceId);
+       sp->offset = sizeof(whdr);
        return 0;
 }
 
@@ -631,19 +658,22 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
  * shut down and the local endpoint from going away, thus sk_user_data will not
  * be cleared until this function returns.
  */
-void rxrpc_data_ready(struct sock *sk)
+void rxrpc_data_ready(struct sock *udp_sk)
 {
        struct rxrpc_connection *conn;
+       struct rxrpc_channel *chan;
+       struct rxrpc_call *call;
        struct rxrpc_skb_priv *sp;
-       struct rxrpc_local *local = sk->sk_user_data;
+       struct rxrpc_local *local = udp_sk->sk_user_data;
        struct sk_buff *skb;
-       int ret;
+       unsigned int channel;
+       int ret, skew;
 
-       _enter("%p", sk);
+       _enter("%p", udp_sk);
 
        ASSERT(!irqs_disabled());
 
-       skb = skb_recv_datagram(sk, 0, 1, &ret);
+       skb = skb_recv_datagram(udp_sk, 0, 1, &ret);
        if (!skb) {
                if (ret == -EAGAIN)
                        return;
@@ -677,6 +707,7 @@ void rxrpc_data_ready(struct sock *sk)
        /* dig out the RxRPC connection details */
        if (rxrpc_extract_header(sp, skb) < 0)
                goto bad_message;
+       trace_rxrpc_rx_packet(sp);
 
        _net("Rx RxRPC %s ep=%x call=%x:%x",
             sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient",
@@ -688,70 +719,124 @@ void rxrpc_data_ready(struct sock *sk)
                goto bad_message;
        }
 
-       if (sp->hdr.type == RXRPC_PACKET_TYPE_VERSION) {
+       switch (sp->hdr.type) {
+       case RXRPC_PACKET_TYPE_VERSION:
                rxrpc_post_packet_to_local(local, skb);
                goto out;
-       }
 
-       if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
-           (sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
-               goto bad_message;
+       case RXRPC_PACKET_TYPE_BUSY:
+               if (sp->hdr.flags & RXRPC_CLIENT_INITIATED)
+                       goto discard;
+
+       case RXRPC_PACKET_TYPE_DATA:
+               if (sp->hdr.callNumber == 0)
+                       goto bad_message;
+               if (sp->hdr.flags & RXRPC_JUMBO_PACKET &&
+                   !rxrpc_validate_jumbo(skb))
+                       goto bad_message;
+               break;
+       }
 
        rcu_read_lock();
 
        conn = rxrpc_find_connection_rcu(local, skb);
-       if (!conn)
-               goto cant_route_call;
+       if (conn) {
+               if (sp->hdr.securityIndex != conn->security_ix)
+                       goto wrong_security;
+
+               if (sp->hdr.callNumber == 0) {
+                       /* Connection-level packet */
+                       _debug("CONN %p {%d}", conn, conn->debug_id);
+                       rxrpc_post_packet_to_conn(conn, skb);
+                       goto out_unlock;
+               }
+
+               /* Note the serial number skew here */
+               skew = (int)sp->hdr.serial - (int)conn->hi_serial;
+               if (skew >= 0) {
+                       if (skew > 0)
+                               conn->hi_serial = sp->hdr.serial;
+               } else {
+                       skew = -skew;
+                       skew = min(skew, 65535);
+               }
 
-       if (sp->hdr.callNumber == 0) {
-               /* Connection-level packet */
-               _debug("CONN %p {%d}", conn, conn->debug_id);
-               rxrpc_post_packet_to_conn(conn, skb);
-       } else {
                /* Call-bound packets are routed by connection channel. */
-               unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK;
-               struct rxrpc_channel *chan = &conn->channels[channel];
-               struct rxrpc_call *call = rcu_dereference(chan->call);
+               channel = sp->hdr.cid & RXRPC_CHANNELMASK;
+               chan = &conn->channels[channel];
+
+               /* Ignore really old calls */
+               if (sp->hdr.callNumber < chan->last_call)
+                       goto discard_unlock;
+
+               if (sp->hdr.callNumber == chan->last_call) {
+                       /* For the previous service call, if completed successfully, we
+                        * discard all further packets.
+                        */
+                       if (rxrpc_conn_is_service(conn) &&
+                           (chan->last_type == RXRPC_PACKET_TYPE_ACK ||
+                            sp->hdr.type == RXRPC_PACKET_TYPE_ABORT))
+                               goto discard_unlock;
+
+                       /* But otherwise we need to retransmit the final packet from
+                        * data cached in the connection record.
+                        */
+                       rxrpc_post_packet_to_conn(conn, skb);
+                       goto out_unlock;
+               }
 
-               if (!call || atomic_read(&call->usage) == 0)
-                       goto cant_route_call;
+               call = rcu_dereference(chan->call);
+       } else {
+               skew = 0;
+               call = NULL;
+       }
 
-               rxrpc_post_packet_to_call(call, skb);
+       if (!call || atomic_read(&call->usage) == 0) {
+               if (!(sp->hdr.type & RXRPC_CLIENT_INITIATED) ||
+                   sp->hdr.callNumber == 0 ||
+                   sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
+                       goto bad_message_unlock;
+               if (sp->hdr.seq != 1)
+                       goto discard_unlock;
+               call = rxrpc_new_incoming_call(local, conn, skb);
+               if (!call) {
+                       rcu_read_unlock();
+                       goto reject_packet;
+               }
        }
 
+       rxrpc_input_call_packet(call, skb, skew);
+       goto discard_unlock;
+
+discard_unlock:
        rcu_read_unlock();
+discard:
+       rxrpc_free_skb(skb);
 out:
+       trace_rxrpc_rx_done(0, 0);
        return;
 
-cant_route_call:
+out_unlock:
        rcu_read_unlock();
+       goto out;
 
-       _debug("can't route call");
-       if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
-           sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
-               if (sp->hdr.seq == 1) {
-                       _debug("first packet");
-                       skb_queue_tail(&local->accept_queue, skb);
-                       rxrpc_queue_work(&local->processor);
-                       _leave(" [incoming]");
-                       return;
-               }
-               skb->priority = RX_INVALID_OPERATION;
-       } else {
-               skb->priority = RX_CALL_DEAD;
-       }
-
-       if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
-               _debug("reject type %d",sp->hdr.type);
-               rxrpc_reject_packet(local, skb);
-       } else {
-               rxrpc_free_skb(skb);
-       }
-       _leave(" [no call]");
-       return;
+wrong_security:
+       rcu_read_unlock();
+       trace_rxrpc_abort("SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+                         RXKADINCONSISTENCY, EBADMSG);
+       skb->priority = RXKADINCONSISTENCY;
+       goto post_abort;
 
+bad_message_unlock:
+       rcu_read_unlock();
 bad_message:
+       trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+                         RX_PROTOCOL_ERROR, EBADMSG);
        skb->priority = RX_PROTOCOL_ERROR;
+post_abort:
+       skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
+reject_packet:
+       trace_rxrpc_rx_done(skb->mark, skb->priority);
        rxrpc_reject_packet(local, skb);
        _leave(" [badmsg]");
 }
index c21ad21..7d4375e 100644 (file)
@@ -23,31 +23,36 @@ static int none_prime_packet_security(struct rxrpc_connection *conn)
 }
 
 static int none_secure_packet(struct rxrpc_call *call,
-                              struct sk_buff *skb,
-                              size_t data_size,
-                              void *sechdr)
+                             struct sk_buff *skb,
+                             size_t data_size,
+                             void *sechdr)
 {
        return 0;
 }
 
-static int none_verify_packet(struct rxrpc_call *call,
-                              struct sk_buff *skb,
-                              u32 *_abort_code)
+static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
+                             unsigned int offset, unsigned int len,
+                             rxrpc_seq_t seq, u16 expected_cksum)
 {
        return 0;
 }
 
+static void none_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
+                            unsigned int *_offset, unsigned int *_len)
+{
+}
+
 static int none_respond_to_challenge(struct rxrpc_connection *conn,
-                                     struct sk_buff *skb,
-                                     u32 *_abort_code)
+                                    struct sk_buff *skb,
+                                    u32 *_abort_code)
 {
        *_abort_code = RX_PROTOCOL_ERROR;
        return -EPROTO;
 }
 
 static int none_verify_response(struct rxrpc_connection *conn,
-                                struct sk_buff *skb,
-                                u32 *_abort_code)
+                               struct sk_buff *skb,
+                               u32 *_abort_code)
 {
        *_abort_code = RX_PROTOCOL_ERROR;
        return -EPROTO;
@@ -78,6 +83,7 @@ const struct rxrpc_security rxrpc_no_security = {
        .prime_packet_security          = none_prime_packet_security,
        .secure_packet                  = none_secure_packet,
        .verify_packet                  = none_verify_packet,
+       .locate_data                    = none_locate_data,
        .respond_to_challenge           = none_respond_to_challenge,
        .verify_response                = none_verify_response,
        .clear                          = none_clear,
index 31a3f86..f073e93 100644 (file)
@@ -15,8 +15,6 @@
 #include <linux/net.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <generated/utsrelease.h>
@@ -33,7 +31,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
 {
        struct rxrpc_wire_header whdr;
        struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-       struct sockaddr_in sin;
+       struct sockaddr_rxrpc srx;
        struct msghdr msg;
        struct kvec iov[2];
        size_t len;
@@ -41,12 +39,11 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
 
        _enter("");
 
-       sin.sin_family = AF_INET;
-       sin.sin_port = udp_hdr(skb)->source;
-       sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+       if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+               return;
 
-       msg.msg_name    = &sin;
-       msg.msg_namelen = sizeof(sin);
+       msg.msg_name    = &srx.transport;
+       msg.msg_namelen = srx.transport_len;
        msg.msg_control = NULL;
        msg.msg_controllen = 0;
        msg.msg_flags   = 0;
@@ -93,11 +90,12 @@ void rxrpc_process_local_events(struct rxrpc_local *local)
        if (skb) {
                struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 
+               rxrpc_see_skb(skb);
                _debug("{%d},{%u}", local->debug_id, sp->hdr.type);
 
                switch (sp->hdr.type) {
                case RXRPC_PACKET_TYPE_VERSION:
-                       if (skb_copy_bits(skb, 0, &v, 1) < 0)
+                       if (skb_copy_bits(skb, sp->offset, &v, 1) < 0)
                                return;
                        _proto("Rx VERSION { %02x }", v);
                        if (v == 0)
index a753796..e3fad80 100644 (file)
@@ -58,6 +58,17 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local,
                        memcmp(&local->srx.transport.sin.sin_addr,
                               &srx->transport.sin.sin_addr,
                               sizeof(struct in_addr));
+#ifdef CONFIG_AF_RXRPC_IPV6
+       case AF_INET6:
+               /* If the choice of UDP6 port is left up to the transport, then
+                * the endpoint record doesn't match.
+                */
+               return ((u16 __force)local->srx.transport.sin6.sin6_port -
+                       (u16 __force)srx->transport.sin6.sin6_port) ?:
+                       memcmp(&local->srx.transport.sin6.sin6_addr,
+                              &srx->transport.sin6.sin6_addr,
+                              sizeof(struct in6_addr));
+#endif
        default:
                BUG();
        }
@@ -75,9 +86,8 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx)
                atomic_set(&local->usage, 1);
                INIT_LIST_HEAD(&local->link);
                INIT_WORK(&local->processor, rxrpc_local_processor);
-               INIT_LIST_HEAD(&local->services);
+               INIT_HLIST_HEAD(&local->services);
                init_rwsem(&local->defrag_sem);
-               skb_queue_head_init(&local->accept_queue);
                skb_queue_head_init(&local->reject_queue);
                skb_queue_head_init(&local->event_queue);
                local->client_conns = RB_ROOT;
@@ -101,11 +111,12 @@ static int rxrpc_open_socket(struct rxrpc_local *local)
        struct sock *sock;
        int ret, opt;
 
-       _enter("%p{%d}", local, local->srx.transport_type);
+       _enter("%p{%d,%d}",
+              local, local->srx.transport_type, local->srx.transport.family);
 
        /* create a socket to represent the local endpoint */
-       ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type,
-                              IPPROTO_UDP, &local->socket);
+       ret = sock_create_kern(&init_net, local->srx.transport.family,
+                              local->srx.transport_type, 0, &local->socket);
        if (ret < 0) {
                _leave(" = %d [socket]", ret);
                return ret;
@@ -170,18 +181,8 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx)
        long diff;
        int ret;
 
-       if (srx->transport.family == AF_INET) {
-               _enter("{%d,%u,%pI4+%hu}",
-                      srx->transport_type,
-                      srx->transport.family,
-                      &srx->transport.sin.sin_addr,
-                      ntohs(srx->transport.sin.sin_port));
-       } else {
-               _enter("{%d,%u}",
-                      srx->transport_type,
-                      srx->transport.family);
-               return ERR_PTR(-EAFNOSUPPORT);
-       }
+       _enter("{%d,%d,%pISp}",
+              srx->transport_type, srx->transport.family, &srx->transport);
 
        mutex_lock(&rxrpc_local_mutex);
 
@@ -234,13 +235,8 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx)
 found:
        mutex_unlock(&rxrpc_local_mutex);
 
-       _net("LOCAL %s %d {%d,%u,%pI4+%hu}",
-            age,
-            local->debug_id,
-            local->srx.transport_type,
-            local->srx.transport.family,
-            &local->srx.transport.sin.sin_addr,
-            ntohs(local->srx.transport.sin.sin_port));
+       _net("LOCAL %s %d {%pISp}",
+            age, local->debug_id, &local->srx.transport);
 
        _leave(" = %p", local);
        return local;
@@ -296,7 +292,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local)
        mutex_unlock(&rxrpc_local_mutex);
 
        ASSERT(RB_EMPTY_ROOT(&local->client_conns));
-       ASSERT(list_empty(&local->services));
+       ASSERT(hlist_empty(&local->services));
 
        if (socket) {
                local->socket = NULL;
@@ -308,7 +304,6 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local)
        /* At this point, there should be no more packets coming in to the
         * local endpoint.
         */
-       rxrpc_purge_queue(&local->accept_queue);
        rxrpc_purge_queue(&local->reject_queue);
        rxrpc_purge_queue(&local->event_queue);
 
@@ -332,11 +327,6 @@ static void rxrpc_local_processor(struct work_struct *work)
                if (atomic_read(&local->usage) == 0)
                        return rxrpc_local_destroyer(local);
 
-               if (!skb_queue_empty(&local->accept_queue)) {
-                       rxrpc_accept_incoming_calls(local);
-                       again = true;
-               }
-
                if (!skb_queue_empty(&local->reject_queue)) {
                        rxrpc_reject_packets(local);
                        again = true;
index bdc5e42..8b91078 100644 (file)
@@ -50,7 +50,10 @@ unsigned int rxrpc_idle_ack_delay = 0.5 * HZ;
  * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further
  * packets.
  */
-unsigned int rxrpc_rx_window_size = 32;
+unsigned int rxrpc_rx_window_size = RXRPC_INIT_RX_WINDOW_SIZE;
+#if (RXRPC_RXTX_BUFF_SIZE - 1) < RXRPC_INIT_RX_WINDOW_SIZE
+#error Need to reduce RXRPC_INIT_RX_WINDOW_SIZE
+#endif
 
 /*
  * Maximum Rx MTU size.  This indicates to the sender the size of jumbo packet
@@ -64,6 +67,11 @@ unsigned int rxrpc_rx_mtu = 5692;
  */
 unsigned int rxrpc_rx_jumbo_max = 4;
 
+/*
+ * Time till packet resend (in jiffies).
+ */
+unsigned int rxrpc_resend_timeout = 4 * HZ;
+
 const char *const rxrpc_pkts[] = {
        "?00",
        "DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG",
index f4bda06..06a9aca 100644 (file)
 #include <linux/net.h>
 #include <linux/gfp.h>
 #include <linux/skbuff.h>
-#include <linux/circ_buf.h>
 #include <linux/export.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
-/*
- * Time till packet resend (in jiffies).
- */
-unsigned int rxrpc_resend_timeout = 4 * HZ;
-
-static int rxrpc_send_data(struct rxrpc_sock *rx,
-                          struct rxrpc_call *call,
-                          struct msghdr *msg, size_t len);
+struct rxrpc_pkt_buffer {
+       struct rxrpc_wire_header whdr;
+       union {
+               struct {
+                       struct rxrpc_ackpacket ack;
+                       u8 acks[255];
+                       u8 pad[3];
+               };
+               __be32 abort_code;
+       };
+       struct rxrpc_ackinfo ackinfo;
+};
 
 /*
- * extract control messages from the sendmsg() control buffer
+ * Fill out an ACK packet.
  */
-static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
-                             unsigned long *user_call_ID,
-                             enum rxrpc_command *command,
-                             u32 *abort_code,
-                             bool *_exclusive)
+static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
+                                struct rxrpc_pkt_buffer *pkt)
 {
-       struct cmsghdr *cmsg;
-       bool got_user_ID = false;
-       int len;
-
-       *command = RXRPC_CMD_SEND_DATA;
-
-       if (msg->msg_controllen == 0)
-               return -EINVAL;
-
-       for_each_cmsghdr(cmsg, msg) {
-               if (!CMSG_OK(msg, cmsg))
-                       return -EINVAL;
-
-               len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
-               _debug("CMSG %d, %d, %d",
-                      cmsg->cmsg_level, cmsg->cmsg_type, len);
-
-               if (cmsg->cmsg_level != SOL_RXRPC)
-                       continue;
-
-               switch (cmsg->cmsg_type) {
-               case RXRPC_USER_CALL_ID:
-                       if (msg->msg_flags & MSG_CMSG_COMPAT) {
-                               if (len != sizeof(u32))
-                                       return -EINVAL;
-                               *user_call_ID = *(u32 *) CMSG_DATA(cmsg);
-                       } else {
-                               if (len != sizeof(unsigned long))
-                                       return -EINVAL;
-                               *user_call_ID = *(unsigned long *)
-                                       CMSG_DATA(cmsg);
-                       }
-                       _debug("User Call ID %lx", *user_call_ID);
-                       got_user_ID = true;
-                       break;
-
-               case RXRPC_ABORT:
-                       if (*command != RXRPC_CMD_SEND_DATA)
-                               return -EINVAL;
-                       *command = RXRPC_CMD_SEND_ABORT;
-                       if (len != sizeof(*abort_code))
-                               return -EINVAL;
-                       *abort_code = *(unsigned int *) CMSG_DATA(cmsg);
-                       _debug("Abort %x", *abort_code);
-                       if (*abort_code == 0)
-                               return -EINVAL;
-                       break;
-
-               case RXRPC_ACCEPT:
-                       if (*command != RXRPC_CMD_SEND_DATA)
-                               return -EINVAL;
-                       *command = RXRPC_CMD_ACCEPT;
-                       if (len != 0)
-                               return -EINVAL;
-                       break;
-
-               case RXRPC_EXCLUSIVE_CALL:
-                       *_exclusive = true;
-                       if (len != 0)
-                               return -EINVAL;
-                       break;
-               default:
-                       return -EINVAL;
-               }
+       rxrpc_seq_t hard_ack, top, seq;
+       int ix;
+       u32 mtu, jmax;
+       u8 *ackp = pkt->acks;
+
+       /* Barrier against rxrpc_input_data(). */
+       hard_ack = READ_ONCE(call->rx_hard_ack);
+       top = smp_load_acquire(&call->rx_top);
+
+       pkt->ack.bufferSpace    = htons(8);
+       pkt->ack.maxSkew        = htons(call->ackr_skew);
+       pkt->ack.firstPacket    = htonl(hard_ack + 1);
+       pkt->ack.previousPacket = htonl(call->ackr_prev_seq);
+       pkt->ack.serial         = htonl(call->ackr_serial);
+       pkt->ack.reason         = call->ackr_reason;
+       pkt->ack.nAcks          = top - hard_ack;
+
+       if (after(top, hard_ack)) {
+               seq = hard_ack + 1;
+               do {
+                       ix = seq & RXRPC_RXTX_BUFF_MASK;
+                       if (call->rxtx_buffer[ix])
+                               *ackp++ = RXRPC_ACK_TYPE_ACK;
+                       else
+                               *ackp++ = RXRPC_ACK_TYPE_NACK;
+                       seq++;
+               } while (before_eq(seq, top));
        }
 
-       if (!got_user_ID)
-               return -EINVAL;
-       _leave(" = 0");
-       return 0;
+       mtu = call->conn->params.peer->if_mtu;
+       mtu -= call->conn->params.peer->hdrsize;
+       jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max;
+       pkt->ackinfo.rxMTU      = htonl(rxrpc_rx_mtu);
+       pkt->ackinfo.maxMTU     = htonl(mtu);
+       pkt->ackinfo.rwind      = htonl(call->rx_winsize);
+       pkt->ackinfo.jumbo_max  = htonl(jmax);
+
+       *ackp++ = 0;
+       *ackp++ = 0;
+       *ackp++ = 0;
+       return top - hard_ack + 3;
 }
 
 /*
- * abort a call, sending an ABORT packet to the peer
+ * Send an ACK or ABORT call packet.
  */
-static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
+int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type)
 {
-       write_lock_bh(&call->state_lock);
-
-       if (call->state <= RXRPC_CALL_COMPLETE) {
-               call->state = RXRPC_CALL_LOCALLY_ABORTED;
-               call->local_abort = abort_code;
-               set_bit(RXRPC_CALL_EV_ABORT, &call->events);
-               del_timer_sync(&call->resend_timer);
-               del_timer_sync(&call->ack_timer);
-               clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
-               clear_bit(RXRPC_CALL_EV_ACK, &call->events);
-               clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
-               rxrpc_queue_call(call);
+       struct rxrpc_connection *conn = NULL;
+       struct rxrpc_pkt_buffer *pkt;
+       struct msghdr msg;
+       struct kvec iov[2];
+       rxrpc_serial_t serial;
+       size_t len, n;
+       int ioc, ret;
+       u32 abort_code;
+
+       _enter("%u,%s", call->debug_id, rxrpc_pkts[type]);
+
+       spin_lock_bh(&call->lock);
+       if (call->conn)
+               conn = rxrpc_get_connection_maybe(call->conn);
+       spin_unlock_bh(&call->lock);
+       if (!conn)
+               return -ECONNRESET;
+
+       pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
+       if (!pkt) {
+               rxrpc_put_connection(conn);
+               return -ENOMEM;
        }
 
-       write_unlock_bh(&call->state_lock);
-}
-
-/*
- * Create a new client call for sendmsg().
- */
-static struct rxrpc_call *
-rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
-                                 unsigned long user_call_ID, bool exclusive)
-{
-       struct rxrpc_conn_parameters cp;
-       struct rxrpc_call *call;
-       struct key *key;
-
-       DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name);
-
-       _enter("");
-
-       if (!msg->msg_name)
-               return ERR_PTR(-EDESTADDRREQ);
-
-       key = rx->key;
-       if (key && !rx->key->payload.data[0])
-               key = NULL;
-
-       memset(&cp, 0, sizeof(cp));
-       cp.local                = rx->local;
-       cp.key                  = rx->key;
-       cp.security_level       = rx->min_sec_level;
-       cp.exclusive            = rx->exclusive | exclusive;
-       cp.service_id           = srx->srx_service;
-       call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL);
-
-       _leave(" = %p\n", call);
-       return call;
-}
-
-/*
- * send a message forming part of a client call through an RxRPC socket
- * - caller holds the socket locked
- * - the socket may be either a client socket or a server socket
- */
-int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
-{
-       enum rxrpc_command cmd;
-       struct rxrpc_call *call;
-       unsigned long user_call_ID = 0;
-       bool exclusive = false;
-       u32 abort_code = 0;
-       int ret;
-
-       _enter("");
-
-       ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code,
-                                &exclusive);
-       if (ret < 0)
-               return ret;
+       serial = atomic_inc_return(&conn->serial);
 
-       if (cmd == RXRPC_CMD_ACCEPT) {
-               if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
-                       return -EINVAL;
-               call = rxrpc_accept_call(rx, user_call_ID);
-               if (IS_ERR(call))
-                       return PTR_ERR(call);
-               rxrpc_put_call(call);
-               return 0;
-       }
+       msg.msg_name    = &call->peer->srx.transport;
+       msg.msg_namelen = call->peer->srx.transport_len;
+       msg.msg_control = NULL;
+       msg.msg_controllen = 0;
+       msg.msg_flags   = 0;
+
+       pkt->whdr.epoch         = htonl(conn->proto.epoch);
+       pkt->whdr.cid           = htonl(call->cid);
+       pkt->whdr.callNumber    = htonl(call->call_id);
+       pkt->whdr.seq           = 0;
+       pkt->whdr.serial        = htonl(serial);
+       pkt->whdr.type          = type;
+       pkt->whdr.flags         = conn->out_clientflag;
+       pkt->whdr.userStatus    = 0;
+       pkt->whdr.securityIndex = call->security_ix;
+       pkt->whdr._rsvd         = 0;
+       pkt->whdr.serviceId     = htons(call->service_id);
+
+       iov[0].iov_base = pkt;
+       iov[0].iov_len  = sizeof(pkt->whdr);
+       len = sizeof(pkt->whdr);
+
+       switch (type) {
+       case RXRPC_PACKET_TYPE_ACK:
+               spin_lock_bh(&call->lock);
+               n = rxrpc_fill_out_ack(call, pkt);
+               call->ackr_reason = 0;
+
+               spin_unlock_bh(&call->lock);
+
+               _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
+                      serial,
+                      ntohs(pkt->ack.maxSkew),
+                      ntohl(pkt->ack.firstPacket),
+                      ntohl(pkt->ack.previousPacket),
+                      ntohl(pkt->ack.serial),
+                      rxrpc_acks(pkt->ack.reason),
+                      pkt->ack.nAcks);
+
+               iov[0].iov_len += sizeof(pkt->ack) + n;
+               iov[1].iov_base = &pkt->ackinfo;
+               iov[1].iov_len  = sizeof(pkt->ackinfo);
+               len += sizeof(pkt->ack) + n + sizeof(pkt->ackinfo);
+               ioc = 2;
+               break;
 
-       call = rxrpc_find_call_by_user_ID(rx, user_call_ID);
-       if (!call) {
-               if (cmd != RXRPC_CMD_SEND_DATA)
-                       return -EBADSLT;
-               call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
-                                                        exclusive);
-               if (IS_ERR(call))
-                       return PTR_ERR(call);
-       }
+       case RXRPC_PACKET_TYPE_ABORT:
+               abort_code = call->abort_code;
+               pkt->abort_code = htonl(abort_code);
+               _proto("Tx ABORT %%%u { %d }", serial, abort_code);
+               iov[0].iov_len += sizeof(pkt->abort_code);
+               len += sizeof(pkt->abort_code);
+               ioc = 1;
+               break;
 
-       _debug("CALL %d USR %lx ST %d on CONN %p",
-              call->debug_id, call->user_call_ID, call->state, call->conn);
-
-       if (call->state >= RXRPC_CALL_COMPLETE) {
-               /* it's too late for this call */
-               ret = -ECONNRESET;
-       } else if (cmd == RXRPC_CMD_SEND_ABORT) {
-               rxrpc_send_abort(call, abort_code);
-               ret = 0;
-       } else if (cmd != RXRPC_CMD_SEND_DATA) {
-               ret = -EINVAL;
-       } else if (!call->in_clientflag &&
-                  call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
-               /* request phase complete for this client call */
-               ret = -EPROTO;
-       } else if (call->in_clientflag &&
-                  call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
-                  call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
-               /* Reply phase not begun or not complete for service call. */
-               ret = -EPROTO;
-       } else {
-               ret = rxrpc_send_data(rx, call, msg, len);
+       default:
+               BUG();
+               ret = -ENOANO;
+               goto out;
        }
 
-       rxrpc_put_call(call);
-       _leave(" = %d", ret);
-       return ret;
-}
-
-/**
- * rxrpc_kernel_send_data - Allow a kernel service to send data on a call
- * @call: The call to send data through
- * @msg: The data to send
- * @len: The amount of data to send
- *
- * Allow a kernel service to send data on a call.  The call must be in an state
- * appropriate to sending data.  No control data should be supplied in @msg,
- * nor should an address be supplied.  MSG_MORE should be flagged if there's
- * more data to come, otherwise this data will end the transmission phase.
- */
-int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
-                          size_t len)
-{
-       int ret;
-
-       _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
-
-       ASSERTCMP(msg->msg_name, ==, NULL);
-       ASSERTCMP(msg->msg_control, ==, NULL);
-
-       lock_sock(&call->socket->sk);
-
-       _debug("CALL %d USR %lx ST %d on CONN %p",
-              call->debug_id, call->user_call_ID, call->state, call->conn);
+       ret = kernel_sendmsg(conn->params.local->socket,
+                            &msg, iov, ioc, len);
 
-       if (call->state >= RXRPC_CALL_COMPLETE) {
-               ret = -ESHUTDOWN; /* it's too late for this call */
-       } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
-                  call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
-                  call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
-               ret = -EPROTO; /* request phase complete for this client call */
-       } else {
-               ret = rxrpc_send_data(call->socket, call, msg, len);
+       if (ret < 0 && call->state < RXRPC_CALL_COMPLETE) {
+               switch (pkt->whdr.type) {
+               case RXRPC_PACKET_TYPE_ACK:
+                       rxrpc_propose_ACK(call, pkt->ack.reason,
+                                         ntohs(pkt->ack.maxSkew),
+                                         ntohl(pkt->ack.serial),
+                                         true, true);
+                       break;
+               case RXRPC_PACKET_TYPE_ABORT:
+                       break;
+               }
        }
 
-       release_sock(&call->socket->sk);
-       _leave(" = %d", ret);
+out:
+       rxrpc_put_connection(conn);
+       kfree(pkt);
        return ret;
 }
 
-EXPORT_SYMBOL(rxrpc_kernel_send_data);
-
-/**
- * rxrpc_kernel_abort_call - Allow a kernel service to abort a call
- * @call: The call to be aborted
- * @abort_code: The abort code to stick into the ABORT packet
- *
- * Allow a kernel service to abort a call, if it's still in an abortable state.
- */
-void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code)
-{
-       _enter("{%d},%d", call->debug_id, abort_code);
-
-       lock_sock(&call->socket->sk);
-
-       _debug("CALL %d USR %lx ST %d on CONN %p",
-              call->debug_id, call->user_call_ID, call->state, call->conn);
-
-       if (call->state < RXRPC_CALL_COMPLETE)
-               rxrpc_send_abort(call, abort_code);
-
-       release_sock(&call->socket->sk);
-       _leave("");
-}
-
-EXPORT_SYMBOL(rxrpc_kernel_abort_call);
-
 /*
  * send a packet through the transport endpoint
  */
@@ -367,355 +258,83 @@ send_fragmentable:
                                          (char *)&opt, sizeof(opt));
                }
                break;
-       }
-
-       up_write(&conn->params.local->defrag_sem);
-       _leave(" = %d [frag %u]", ret, conn->params.peer->maxdata);
-       return ret;
-}
-
-/*
- * wait for space to appear in the transmit/ACK window
- * - caller holds the socket locked
- */
-static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
-                                   struct rxrpc_call *call,
-                                   long *timeo)
-{
-       DECLARE_WAITQUEUE(myself, current);
-       int ret;
-
-       _enter(",{%d},%ld",
-              CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail),
-                         call->acks_winsz),
-              *timeo);
 
-       add_wait_queue(&call->tx_waitq, &myself);
+#ifdef CONFIG_AF_RXRPC_IPV6
+       case AF_INET6:
+               opt = IPV6_PMTUDISC_DONT;
+               ret = kernel_setsockopt(conn->params.local->socket,
+                                       SOL_IPV6, IPV6_MTU_DISCOVER,
+                                       (char *)&opt, sizeof(opt));
+               if (ret == 0) {
+                       ret = kernel_sendmsg(conn->params.local->socket, &msg,
+                                            iov, 1, iov[0].iov_len);
 
-       for (;;) {
-               set_current_state(TASK_INTERRUPTIBLE);
-               ret = 0;
-               if (CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail),
-                              call->acks_winsz) > 0)
-                       break;
-               if (signal_pending(current)) {
-                       ret = sock_intr_errno(*timeo);
-                       break;
+                       opt = IPV6_PMTUDISC_DO;
+                       kernel_setsockopt(conn->params.local->socket,
+                                         SOL_IPV6, IPV6_MTU_DISCOVER,
+                                         (char *)&opt, sizeof(opt));
                }
-
-               release_sock(&rx->sk);
-               *timeo = schedule_timeout(*timeo);
-               lock_sock(&rx->sk);
+               break;
+#endif
        }
 
-       remove_wait_queue(&call->tx_waitq, &myself);
-       set_current_state(TASK_RUNNING);
-       _leave(" = %d", ret);
+       up_write(&conn->params.local->defrag_sem);
+       _leave(" = %d [frag %u]", ret, conn->params.peer->maxdata);
        return ret;
 }
 
 /*
- * attempt to schedule an instant Tx resend
- */
-static inline void rxrpc_instant_resend(struct rxrpc_call *call)
-{
-       read_lock_bh(&call->state_lock);
-       if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
-               clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
-               if (call->state < RXRPC_CALL_COMPLETE &&
-                   !test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events))
-                       rxrpc_queue_call(call);
-       }
-       read_unlock_bh(&call->state_lock);
-}
-
-/*
- * queue a packet for transmission, set the resend timer and attempt
- * to send the packet immediately
- */
-static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
-                              bool last)
-{
-       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-       int ret;
-
-       _net("queue skb %p [%d]", skb, call->acks_head);
-
-       ASSERT(call->acks_window != NULL);
-       call->acks_window[call->acks_head] = (unsigned long) skb;
-       smp_wmb();
-       call->acks_head = (call->acks_head + 1) & (call->acks_winsz - 1);
-
-       if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
-               _debug("________awaiting reply/ACK__________");
-               write_lock_bh(&call->state_lock);
-               switch (call->state) {
-               case RXRPC_CALL_CLIENT_SEND_REQUEST:
-                       call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
-                       break;
-               case RXRPC_CALL_SERVER_ACK_REQUEST:
-                       call->state = RXRPC_CALL_SERVER_SEND_REPLY;
-                       if (!last)
-                               break;
-               case RXRPC_CALL_SERVER_SEND_REPLY:
-                       call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
-                       break;
-               default:
-                       break;
-               }
-               write_unlock_bh(&call->state_lock);
-       }
-
-       _proto("Tx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq);
-
-       sp->need_resend = false;
-       sp->resend_at = jiffies + rxrpc_resend_timeout;
-       if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) {
-               _debug("run timer");
-               call->resend_timer.expires = sp->resend_at;
-               add_timer(&call->resend_timer);
-       }
-
-       /* attempt to cancel the rx-ACK timer, deferring reply transmission if
-        * we're ACK'ing the request phase of an incoming call */
-       ret = -EAGAIN;
-       if (try_to_del_timer_sync(&call->ack_timer) >= 0) {
-               /* the packet may be freed by rxrpc_process_call() before this
-                * returns */
-               ret = rxrpc_send_data_packet(call->conn, skb);
-               _net("sent skb %p", skb);
-       } else {
-               _debug("failed to delete ACK timer");
-       }
-
-       if (ret < 0) {
-               _debug("need instant resend %d", ret);
-               sp->need_resend = true;
-               rxrpc_instant_resend(call);
-       }
-
-       _leave("");
-}
-
-/*
- * Convert a host-endian header into a network-endian header.
- */
-static void rxrpc_insert_header(struct sk_buff *skb)
-{
-       struct rxrpc_wire_header whdr;
-       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
-       whdr.epoch      = htonl(sp->hdr.epoch);
-       whdr.cid        = htonl(sp->hdr.cid);
-       whdr.callNumber = htonl(sp->hdr.callNumber);
-       whdr.seq        = htonl(sp->hdr.seq);
-       whdr.serial     = htonl(sp->hdr.serial);
-       whdr.type       = sp->hdr.type;
-       whdr.flags      = sp->hdr.flags;
-       whdr.userStatus = sp->hdr.userStatus;
-       whdr.securityIndex = sp->hdr.securityIndex;
-       whdr._rsvd      = htons(sp->hdr._rsvd);
-       whdr.serviceId  = htons(sp->hdr.serviceId);
-
-       memcpy(skb->head, &whdr, sizeof(whdr));
-}
-
-/*
- * send data through a socket
- * - must be called in process context
- * - caller holds the socket locked
+ * reject packets through the local endpoint
  */
-static int rxrpc_send_data(struct rxrpc_sock *rx,
-                          struct rxrpc_call *call,
-                          struct msghdr *msg, size_t len)
+void rxrpc_reject_packets(struct rxrpc_local *local)
 {
+       struct sockaddr_rxrpc srx;
        struct rxrpc_skb_priv *sp;
+       struct rxrpc_wire_header whdr;
        struct sk_buff *skb;
-       struct sock *sk = &rx->sk;
-       long timeo;
-       bool more;
-       int ret, copied;
-
-       timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
-
-       /* this should be in poll */
-       sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
-
-       if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
-               return -EPIPE;
-
-       more = msg->msg_flags & MSG_MORE;
-
-       skb = call->tx_pending;
-       call->tx_pending = NULL;
-
-       copied = 0;
-       do {
-               if (!skb) {
-                       size_t size, chunk, max, space;
-
-                       _debug("alloc");
-
-                       if (CIRC_SPACE(call->acks_head,
-                                      ACCESS_ONCE(call->acks_tail),
-                                      call->acks_winsz) <= 0) {
-                               ret = -EAGAIN;
-                               if (msg->msg_flags & MSG_DONTWAIT)
-                                       goto maybe_error;
-                               ret = rxrpc_wait_for_tx_window(rx, call,
-                                                              &timeo);
-                               if (ret < 0)
-                                       goto maybe_error;
-                       }
-
-                       max = call->conn->params.peer->maxdata;
-                       max -= call->conn->security_size;
-                       max &= ~(call->conn->size_align - 1UL);
-
-                       chunk = max;
-                       if (chunk > msg_data_left(msg) && !more)
-                               chunk = msg_data_left(msg);
-
-                       space = chunk + call->conn->size_align;
-                       space &= ~(call->conn->size_align - 1UL);
-
-                       size = space + call->conn->header_size;
-
-                       _debug("SIZE: %zu/%zu/%zu", chunk, space, size);
-
-                       /* create a buffer that we can retain until it's ACK'd */
-                       skb = sock_alloc_send_skb(
-                               sk, size, msg->msg_flags & MSG_DONTWAIT, &ret);
-                       if (!skb)
-                               goto maybe_error;
-
-                       rxrpc_new_skb(skb);
-
-                       _debug("ALLOC SEND %p", skb);
-
-                       ASSERTCMP(skb->mark, ==, 0);
+       struct msghdr msg;
+       struct kvec iov[2];
+       size_t size;
+       __be32 code;
 
-                       _debug("HS: %u", call->conn->header_size);
-                       skb_reserve(skb, call->conn->header_size);
-                       skb->len += call->conn->header_size;
+       _enter("%d", local->debug_id);
 
-                       sp = rxrpc_skb(skb);
-                       sp->remain = chunk;
-                       if (sp->remain > skb_tailroom(skb))
-                               sp->remain = skb_tailroom(skb);
+       iov[0].iov_base = &whdr;
+       iov[0].iov_len = sizeof(whdr);
+       iov[1].iov_base = &code;
+       iov[1].iov_len = sizeof(code);
+       size = sizeof(whdr) + sizeof(code);
 
-                       _net("skb: hr %d, tr %d, hl %d, rm %d",
-                              skb_headroom(skb),
-                              skb_tailroom(skb),
-                              skb_headlen(skb),
-                              sp->remain);
+       msg.msg_name = &srx.transport;
+       msg.msg_control = NULL;
+       msg.msg_controllen = 0;
+       msg.msg_flags = 0;
 
-                       skb->ip_summed = CHECKSUM_UNNECESSARY;
-               }
+       memset(&whdr, 0, sizeof(whdr));
+       whdr.type = RXRPC_PACKET_TYPE_ABORT;
 
-               _debug("append");
+       while ((skb = skb_dequeue(&local->reject_queue))) {
+               rxrpc_see_skb(skb);
                sp = rxrpc_skb(skb);
 
-               /* append next segment of data to the current buffer */
-               if (msg_data_left(msg) > 0) {
-                       int copy = skb_tailroom(skb);
-                       ASSERTCMP(copy, >, 0);
-                       if (copy > msg_data_left(msg))
-                               copy = msg_data_left(msg);
-                       if (copy > sp->remain)
-                               copy = sp->remain;
-
-                       _debug("add");
-                       ret = skb_add_data(skb, &msg->msg_iter, copy);
-                       _debug("added");
-                       if (ret < 0)
-                               goto efault;
-                       sp->remain -= copy;
-                       skb->mark += copy;
-                       copied += copy;
-               }
+               if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
+                       msg.msg_namelen = srx.transport_len;
 
-               /* check for the far side aborting the call or a network error
-                * occurring */
-               if (call->state > RXRPC_CALL_COMPLETE)
-                       goto call_aborted;
-
-               /* add the packet to the send queue if it's now full */
-               if (sp->remain <= 0 ||
-                   (msg_data_left(msg) == 0 && !more)) {
-                       struct rxrpc_connection *conn = call->conn;
-                       uint32_t seq;
-                       size_t pad;
-
-                       /* pad out if we're using security */
-                       if (conn->security_ix) {
-                               pad = conn->security_size + skb->mark;
-                               pad = conn->size_align - pad;
-                               pad &= conn->size_align - 1;
-                               _debug("pad %zu", pad);
-                               if (pad)
-                                       memset(skb_put(skb, pad), 0, pad);
-                       }
-
-                       seq = atomic_inc_return(&call->sequence);
-
-                       sp->hdr.epoch   = conn->proto.epoch;
-                       sp->hdr.cid     = call->cid;
-                       sp->hdr.callNumber = call->call_id;
-                       sp->hdr.seq     = seq;
-                       sp->hdr.serial  = atomic_inc_return(&conn->serial);
-                       sp->hdr.type    = RXRPC_PACKET_TYPE_DATA;
-                       sp->hdr.userStatus = 0;
-                       sp->hdr.securityIndex = conn->security_ix;
-                       sp->hdr._rsvd   = 0;
-                       sp->hdr.serviceId = call->service_id;
-
-                       sp->hdr.flags = conn->out_clientflag;
-                       if (msg_data_left(msg) == 0 && !more)
-                               sp->hdr.flags |= RXRPC_LAST_PACKET;
-                       else if (CIRC_SPACE(call->acks_head,
-                                           ACCESS_ONCE(call->acks_tail),
-                                           call->acks_winsz) > 1)
-                               sp->hdr.flags |= RXRPC_MORE_PACKETS;
-                       if (more && seq & 1)
-                               sp->hdr.flags |= RXRPC_REQUEST_ACK;
-
-                       ret = conn->security->secure_packet(
-                               call, skb, skb->mark,
-                               skb->head + sizeof(struct rxrpc_wire_header));
-                       if (ret < 0)
-                               goto out;
-
-                       rxrpc_insert_header(skb);
-                       rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
-                       skb = NULL;
-               }
-       } while (msg_data_left(msg) > 0);
+                       code = htonl(skb->priority);
 
-success:
-       ret = copied;
-out:
-       call->tx_pending = skb;
-       _leave(" = %d", ret);
-       return ret;
+                       whdr.epoch      = htonl(sp->hdr.epoch);
+                       whdr.cid        = htonl(sp->hdr.cid);
+                       whdr.callNumber = htonl(sp->hdr.callNumber);
+                       whdr.serviceId  = htons(sp->hdr.serviceId);
+                       whdr.flags      = sp->hdr.flags;
+                       whdr.flags      ^= RXRPC_CLIENT_INITIATED;
+                       whdr.flags      &= RXRPC_CLIENT_INITIATED;
 
-call_aborted:
-       rxrpc_free_skb(skb);
-       if (call->state == RXRPC_CALL_NETWORK_ERROR)
-               ret = call->error_report < RXRPC_LOCAL_ERROR_OFFSET ?
-                       call->error_report :
-                       call->error_report - RXRPC_LOCAL_ERROR_OFFSET;
-       else
-               ret = -ECONNABORTED;
-       _leave(" = %d", ret);
-       return ret;
+                       kernel_sendmsg(local->socket, &msg, iov, 2, size);
+               }
 
-maybe_error:
-       if (copied)
-               goto success;
-       goto out;
+               rxrpc_free_skb(skb);
+       }
 
-efault:
-       ret = -EFAULT;
-       goto out;
+       _leave("");
 }
index 8940674..9e0725f 100644 (file)
@@ -66,6 +66,32 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
                }
                break;
 
+#ifdef CONFIG_AF_RXRPC_IPV6
+       case AF_INET6:
+               srx.transport.sin6.sin6_port = serr->port;
+               srx.transport_len = sizeof(struct sockaddr_in6);
+               switch (serr->ee.ee_origin) {
+               case SO_EE_ORIGIN_ICMP6:
+                       _net("Rx ICMP6");
+                       memcpy(&srx.transport.sin6.sin6_addr,
+                              skb_network_header(skb) + serr->addr_offset,
+                              sizeof(struct in6_addr));
+                       break;
+               case SO_EE_ORIGIN_ICMP:
+                       _net("Rx ICMP on v6 sock");
+                       memcpy(srx.transport.sin6.sin6_addr.s6_addr + 12,
+                              skb_network_header(skb) + serr->addr_offset,
+                              sizeof(struct in_addr));
+                       break;
+               default:
+                       memcpy(&srx.transport.sin6.sin6_addr,
+                              &ipv6_hdr(skb)->saddr,
+                              sizeof(struct in6_addr));
+                       break;
+               }
+               break;
+#endif
+
        default:
                BUG();
        }
@@ -129,15 +155,14 @@ void rxrpc_error_report(struct sock *sk)
                _leave("UDP socket errqueue empty");
                return;
        }
+       rxrpc_new_skb(skb);
        serr = SKB_EXT_ERR(skb);
        if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
                _leave("UDP empty message");
-               kfree_skb(skb);
+               rxrpc_free_skb(skb);
                return;
        }
 
-       rxrpc_new_skb(skb);
-
        rcu_read_lock();
        peer = rxrpc_lookup_peer_icmp_rcu(local, skb);
        if (peer && !rxrpc_get_peer_maybe(peer))
@@ -248,13 +273,20 @@ void rxrpc_peer_error_distributor(struct work_struct *work)
        struct rxrpc_peer *peer =
                container_of(work, struct rxrpc_peer, error_distributor);
        struct rxrpc_call *call;
-       int error_report;
+       enum rxrpc_call_completion compl;
+       int error;
 
        _enter("");
 
-       error_report = READ_ONCE(peer->error_report);
+       error = READ_ONCE(peer->error_report);
+       if (error < RXRPC_LOCAL_ERROR_OFFSET) {
+               compl = RXRPC_CALL_NETWORK_ERROR;
+       } else {
+               compl = RXRPC_CALL_LOCAL_ERROR;
+               error -= RXRPC_LOCAL_ERROR_OFFSET;
+       }
 
-       _debug("ISSUE ERROR %d", error_report);
+       _debug("ISSUE ERROR %s %d", rxrpc_call_completions[compl], error);
 
        spin_lock_bh(&peer->lock);
 
@@ -262,16 +294,10 @@ void rxrpc_peer_error_distributor(struct work_struct *work)
                call = hlist_entry(peer->error_targets.first,
                                   struct rxrpc_call, error_link);
                hlist_del_init(&call->error_link);
+               rxrpc_see_call(call);
 
-               write_lock(&call->state_lock);
-               if (call->state != RXRPC_CALL_COMPLETE &&
-                   call->state < RXRPC_CALL_NETWORK_ERROR) {
-                       call->error_report = error_report;
-                       call->state = RXRPC_CALL_NETWORK_ERROR;
-                       set_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events);
-                       rxrpc_queue_call(call);
-               }
-               write_unlock(&call->state_lock);
+               if (rxrpc_set_call_completion(call, compl, 0, error))
+                       rxrpc_notify_socket(call);
        }
 
        spin_unlock_bh(&peer->lock);
index 538e983..f3e5766 100644 (file)
 #include <linux/skbuff.h>
 #include <linux/udp.h>
 #include <linux/in.h>
+#include <linux/in6.h>
 #include <linux/slab.h>
 #include <linux/hashtable.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <net/ip.h>
 #include <net/route.h>
+#include <net/ip6_route.h>
 #include "ar-internal.h"
 
 static DEFINE_HASHTABLE(rxrpc_peer_hash, 10);
@@ -50,6 +52,13 @@ static unsigned long rxrpc_peer_hash_key(struct rxrpc_local *local,
                size = sizeof(srx->transport.sin.sin_addr);
                p = (u16 *)&srx->transport.sin.sin_addr;
                break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+       case AF_INET6:
+               hash_key += (u16 __force)srx->transport.sin.sin_port;
+               size = sizeof(srx->transport.sin6.sin6_addr);
+               p = (u16 *)&srx->transport.sin6.sin6_addr;
+               break;
+#endif
        default:
                WARN(1, "AF_RXRPC: Unsupported transport address family\n");
                return 0;
@@ -93,6 +102,14 @@ static long rxrpc_peer_cmp_key(const struct rxrpc_peer *peer,
                        memcmp(&peer->srx.transport.sin.sin_addr,
                               &srx->transport.sin.sin_addr,
                               sizeof(struct in_addr));
+#ifdef CONFIG_AF_RXRPC_IPV6
+       case AF_INET6:
+               return ((u16 __force)peer->srx.transport.sin6.sin6_port -
+                       (u16 __force)srx->transport.sin6.sin6_port) ?:
+                       memcmp(&peer->srx.transport.sin6.sin6_addr,
+                              &srx->transport.sin6.sin6_addr,
+                              sizeof(struct in6_addr));
+#endif
        default:
                BUG();
        }
@@ -130,17 +147,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
 
        peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
        if (peer) {
-               switch (srx->transport.family) {
-               case AF_INET:
-                       _net("PEER %d {%d,%u,%pI4+%hu}",
-                            peer->debug_id,
-                            peer->srx.transport_type,
-                            peer->srx.transport.family,
-                            &peer->srx.transport.sin.sin_addr,
-                            ntohs(peer->srx.transport.sin.sin_port));
-                       break;
-               }
-
+               _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
                _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
        }
        return peer;
@@ -152,22 +159,53 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
  */
 static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
 {
+       struct dst_entry *dst;
        struct rtable *rt;
-       struct flowi4 fl4;
+       struct flowi fl;
+       struct flowi4 *fl4 = &fl.u.ip4;
+#ifdef CONFIG_AF_RXRPC_IPV6
+       struct flowi6 *fl6 = &fl.u.ip6;
+#endif
 
        peer->if_mtu = 1500;
 
-       rt = ip_route_output_ports(&init_net, &fl4, NULL,
-                                  peer->srx.transport.sin.sin_addr.s_addr, 0,
-                                  htons(7000), htons(7001),
-                                  IPPROTO_UDP, 0, 0);
-       if (IS_ERR(rt)) {
-               _leave(" [route err %ld]", PTR_ERR(rt));
-               return;
+       memset(&fl, 0, sizeof(fl));
+       switch (peer->srx.transport.family) {
+       case AF_INET:
+               rt = ip_route_output_ports(
+                       &init_net, fl4, NULL,
+                       peer->srx.transport.sin.sin_addr.s_addr, 0,
+                       htons(7000), htons(7001), IPPROTO_UDP, 0, 0);
+               if (IS_ERR(rt)) {
+                       _leave(" [route err %ld]", PTR_ERR(rt));
+                       return;
+               }
+               dst = &rt->dst;
+               break;
+
+#ifdef CONFIG_AF_RXRPC_IPV6
+       case AF_INET6:
+               fl6->flowi6_iif = LOOPBACK_IFINDEX;
+               fl6->flowi6_scope = RT_SCOPE_UNIVERSE;
+               fl6->flowi6_proto = IPPROTO_UDP;
+               memcpy(&fl6->daddr, &peer->srx.transport.sin6.sin6_addr,
+                      sizeof(struct in6_addr));
+               fl6->fl6_dport = htons(7001);
+               fl6->fl6_sport = htons(7000);
+               dst = ip6_route_output(&init_net, NULL, fl6);
+               if (IS_ERR(dst)) {
+                       _leave(" [route err %ld]", PTR_ERR(dst));
+                       return;
+               }
+               break;
+#endif
+
+       default:
+               BUG();
        }
 
-       peer->if_mtu = dst_mtu(&rt->dst);
-       dst_release(&rt->dst);
+       peer->if_mtu = dst_mtu(dst);
+       dst_release(dst);
 
        _leave(" [if_mtu %u]", peer->if_mtu);
 }
@@ -198,6 +236,40 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
        return peer;
 }
 
+/*
+ * Initialise peer record.
+ */
+static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key)
+{
+       peer->hash_key = hash_key;
+       rxrpc_assess_MTU_size(peer);
+       peer->mtu = peer->if_mtu;
+
+       switch (peer->srx.transport.family) {
+       case AF_INET:
+               peer->hdrsize = sizeof(struct iphdr);
+               break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+       case AF_INET6:
+               peer->hdrsize = sizeof(struct ipv6hdr);
+               break;
+#endif
+       default:
+               BUG();
+       }
+
+       switch (peer->srx.transport_type) {
+       case SOCK_DGRAM:
+               peer->hdrsize += sizeof(struct udphdr);
+               break;
+       default:
+               BUG();
+       }
+
+       peer->hdrsize += sizeof(struct rxrpc_wire_header);
+       peer->maxdata = peer->mtu - peer->hdrsize;
+}
+
 /*
  * Set up a new peer.
  */
@@ -212,31 +284,40 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local,
 
        peer = rxrpc_alloc_peer(local, gfp);
        if (peer) {
-               peer->hash_key = hash_key;
                memcpy(&peer->srx, srx, sizeof(*srx));
+               rxrpc_init_peer(peer, hash_key);
+       }
 
-               rxrpc_assess_MTU_size(peer);
-               peer->mtu = peer->if_mtu;
-
-               if (srx->transport.family == AF_INET) {
-                       peer->hdrsize = sizeof(struct iphdr);
-                       switch (srx->transport_type) {
-                       case SOCK_DGRAM:
-                               peer->hdrsize += sizeof(struct udphdr);
-                               break;
-                       default:
-                               BUG();
-                               break;
-                       }
-               } else {
-                       BUG();
-               }
+       _leave(" = %p", peer);
+       return peer;
+}
+
+/*
+ * Set up a new incoming peer.  The address is prestored in the preallocated
+ * peer.
+ */
+struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local,
+                                             struct rxrpc_peer *prealloc)
+{
+       struct rxrpc_peer *peer;
+       unsigned long hash_key;
+
+       hash_key = rxrpc_peer_hash_key(local, &prealloc->srx);
+       prealloc->local = local;
+       rxrpc_init_peer(prealloc, hash_key);
 
-               peer->hdrsize += sizeof(struct rxrpc_wire_header);
-               peer->maxdata = peer->mtu - peer->hdrsize;
+       spin_lock(&rxrpc_peer_hash_lock);
+
+       /* Need to check that we aren't racing with someone else */
+       peer = __rxrpc_lookup_peer_rcu(local, &prealloc->srx, hash_key);
+       if (peer && !rxrpc_get_peer_maybe(peer))
+               peer = NULL;
+       if (!peer) {
+               peer = prealloc;
+               hash_add_rcu(rxrpc_peer_hash, &peer->hash_link, hash_key);
        }
 
-       _leave(" = %p", peer);
+       spin_unlock(&rxrpc_peer_hash_lock);
        return peer;
 }
 
@@ -249,11 +330,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
        struct rxrpc_peer *peer, *candidate;
        unsigned long hash_key = rxrpc_peer_hash_key(local, srx);
 
-       _enter("{%d,%d,%pI4+%hu}",
-              srx->transport_type,
-              srx->transport_len,
-              &srx->transport.sin.sin_addr,
-              ntohs(srx->transport.sin.sin_port));
+       _enter("{%pISp}", &srx->transport);
 
        /* search the peer list first */
        rcu_read_lock();
@@ -272,7 +349,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
                        return NULL;
                }
 
-               spin_lock(&rxrpc_peer_hash_lock);
+               spin_lock_bh(&rxrpc_peer_hash_lock);
 
                /* Need to check that we aren't racing with someone else */
                peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
@@ -282,7 +359,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
                        hash_add_rcu(rxrpc_peer_hash,
                                     &candidate->hash_link, hash_key);
 
-               spin_unlock(&rxrpc_peer_hash_lock);
+               spin_unlock_bh(&rxrpc_peer_hash_lock);
 
                if (peer)
                        kfree(candidate);
@@ -290,11 +367,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
                        peer = candidate;
        }
 
-       _net("PEER %d {%d,%pI4+%hu}",
-            peer->debug_id,
-            peer->srx.transport_type,
-            &peer->srx.transport.sin.sin_addr,
-            ntohs(peer->srx.transport.sin.sin_port));
+       _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
 
        _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
        return peer;
@@ -307,9 +380,24 @@ void __rxrpc_put_peer(struct rxrpc_peer *peer)
 {
        ASSERT(hlist_empty(&peer->error_targets));
 
-       spin_lock(&rxrpc_peer_hash_lock);
+       spin_lock_bh(&rxrpc_peer_hash_lock);
        hash_del_rcu(&peer->hash_link);
-       spin_unlock(&rxrpc_peer_hash_lock);
+       spin_unlock_bh(&rxrpc_peer_hash_lock);
 
        kfree_rcu(peer, rcu);
 }
+
+/**
+ * rxrpc_kernel_get_peer - Get the peer address of a call
+ * @sock: The socket on which the call is in progress.
+ * @call: The call to query
+ * @_srx: Where to place the result
+ *
+ * Get the address of the remote peer in a call.
+ */
+void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
+                          struct sockaddr_rxrpc *_srx)
+{
+       *_srx = call->peer->srx;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_peer);
index ced5f07..65cd980 100644 (file)
 static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
        [RXRPC_CONN_UNUSED]                     = "Unused  ",
        [RXRPC_CONN_CLIENT]                     = "Client  ",
+       [RXRPC_CONN_SERVICE_PREALLOC]           = "SvPrealc",
        [RXRPC_CONN_SERVICE_UNSECURED]          = "SvUnsec ",
        [RXRPC_CONN_SERVICE_CHALLENGING]        = "SvChall ",
        [RXRPC_CONN_SERVICE]                    = "SvSecure",
        [RXRPC_CONN_REMOTELY_ABORTED]           = "RmtAbort",
        [RXRPC_CONN_LOCALLY_ABORTED]            = "LocAbort",
-       [RXRPC_CONN_NETWORK_ERROR]              = "NetError",
 };
 
 /*
@@ -30,6 +30,7 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
  */
 static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
 {
+       rcu_read_lock();
        read_lock(&rxrpc_call_lock);
        return seq_list_start_head(&rxrpc_calls, *_pos);
 }
@@ -42,17 +43,21 @@ static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
 {
        read_unlock(&rxrpc_call_lock);
+       rcu_read_unlock();
 }
 
 static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
 {
-       struct rxrpc_connection *conn;
+       struct rxrpc_local *local;
+       struct rxrpc_sock *rx;
+       struct rxrpc_peer *peer;
        struct rxrpc_call *call;
-       char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
+       char lbuff[50], rbuff[50];
 
        if (v == &rxrpc_calls) {
                seq_puts(seq,
-                        "Proto Local                  Remote                "
+                        "Proto Local                                          "
+                        " Remote                                         "
                         " SvID ConnID   CallID   End Use State    Abort   "
                         " UserID\n");
                return 0;
@@ -60,30 +65,35 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
 
        call = list_entry(v, struct rxrpc_call, link);
 
-       sprintf(lbuff, "%pI4:%u",
-               &call->local->srx.transport.sin.sin_addr,
-               ntohs(call->local->srx.transport.sin.sin_port));
+       rx = rcu_dereference(call->socket);
+       if (rx) {
+               local = READ_ONCE(rx->local);
+               if (local)
+                       sprintf(lbuff, "%pISpc", &local->srx.transport);
+               else
+                       strcpy(lbuff, "no_local");
+       } else {
+               strcpy(lbuff, "no_socket");
+       }
 
-       conn = call->conn;
-       if (conn)
-               sprintf(rbuff, "%pI4:%u",
-                       &conn->params.peer->srx.transport.sin.sin_addr,
-                       ntohs(conn->params.peer->srx.transport.sin.sin_port));
+       peer = call->peer;
+       if (peer)
+               sprintf(rbuff, "%pISpc", &peer->srx.transport);
        else
                strcpy(rbuff, "no_connection");
 
        seq_printf(seq,
-                  "UDP   %-22.22s %-22.22s %4x %08x %08x %s %3u"
+                  "UDP   %-47.47s %-47.47s %4x %08x %08x %s %3u"
                   " %-8.8s %08x %lx\n",
                   lbuff,
                   rbuff,
                   call->service_id,
                   call->cid,
                   call->call_id,
-                  call->in_clientflag ? "Svc" : "Clt",
+                  rxrpc_is_service_call(call) ? "Svc" : "Clt",
                   atomic_read(&call->usage),
                   rxrpc_call_states[call->state],
-                  call->remote_abort ?: call->local_abort,
+                  call->abort_code,
                   call->user_call_ID);
 
        return 0;
@@ -115,13 +125,13 @@ const struct file_operations rxrpc_call_seq_fops = {
 static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
 {
        read_lock(&rxrpc_connection_lock);
-       return seq_list_start_head(&rxrpc_connections, *_pos);
+       return seq_list_start_head(&rxrpc_connection_proc_list, *_pos);
 }
 
 static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v,
                                       loff_t *pos)
 {
-       return seq_list_next(v, &rxrpc_connections, pos);
+       return seq_list_next(v, &rxrpc_connection_proc_list, pos);
 }
 
 static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
@@ -132,29 +142,31 @@ static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
 static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
 {
        struct rxrpc_connection *conn;
-       char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
+       char lbuff[50], rbuff[50];
 
-       if (v == &rxrpc_connections) {
+       if (v == &rxrpc_connection_proc_list) {
                seq_puts(seq,
-                        "Proto Local                  Remote                "
+                        "Proto Local                                          "
+                        " Remote                                         "
                         " SvID ConnID   End Use State    Key     "
                         " Serial   ISerial\n"
                         );
                return 0;
        }
 
-       conn = list_entry(v, struct rxrpc_connection, link);
-
-       sprintf(lbuff, "%pI4:%u",
-               &conn->params.local->srx.transport.sin.sin_addr,
-               ntohs(conn->params.local->srx.transport.sin.sin_port));
+       conn = list_entry(v, struct rxrpc_connection, proc_link);
+       if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) {
+               strcpy(lbuff, "no_local");
+               strcpy(rbuff, "no_connection");
+               goto print;
+       }
 
-       sprintf(rbuff, "%pI4:%u",
-               &conn->params.peer->srx.transport.sin.sin_addr,
-               ntohs(conn->params.peer->srx.transport.sin.sin_port));
+       sprintf(lbuff, "%pISpc", &conn->params.local->srx.transport);
 
+       sprintf(rbuff, "%pISpc", &conn->params.peer->srx.transport);
+print:
        seq_printf(seq,
-                  "UDP   %-22.22s %-22.22s %4x %08x %s %3u"
+                  "UDP   %-47.47s %-47.47s %4x %08x %s %3u"
                   " %s %08x %08x %08x\n",
                   lbuff,
                   rbuff,
@@ -165,7 +177,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
                   rxrpc_conn_states[conn->state],
                   key_serial(conn->params.key),
                   atomic_read(&conn->serial),
-                  atomic_read(&conn->hi_serial));
+                  conn->hi_serial);
 
        return 0;
 }
index 9ed66d5..a284205 100644 (file)
 #include "ar-internal.h"
 
 /*
- * removal a call's user ID from the socket tree to make the user ID available
- * again and so that it won't be seen again in association with that call
+ * Post a call for attention by the socket or kernel service.  Further
+ * notifications are suppressed by putting recvmsg_link on a dummy queue.
  */
-void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call)
+void rxrpc_notify_socket(struct rxrpc_call *call)
 {
-       _debug("RELEASE CALL %d", call->debug_id);
+       struct rxrpc_sock *rx;
+       struct sock *sk;
+
+       _enter("%d", call->debug_id);
+
+       if (!list_empty(&call->recvmsg_link))
+               return;
+
+       rcu_read_lock();
+
+       rx = rcu_dereference(call->socket);
+       sk = &rx->sk;
+       if (rx && sk->sk_state < RXRPC_CLOSE) {
+               if (call->notify_rx) {
+                       call->notify_rx(sk, call, call->user_call_ID);
+               } else {
+                       write_lock_bh(&rx->recvmsg_lock);
+                       if (list_empty(&call->recvmsg_link)) {
+                               rxrpc_get_call(call, rxrpc_call_got);
+                               list_add_tail(&call->recvmsg_link, &rx->recvmsg_q);
+                       }
+                       write_unlock_bh(&rx->recvmsg_lock);
 
-       if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
-               write_lock_bh(&rx->call_lock);
-               rb_erase(&call->sock_node, &call->socket->calls);
-               clear_bit(RXRPC_CALL_HAS_USERID, &call->flags);
-               write_unlock_bh(&rx->call_lock);
+                       if (!sock_flag(sk, SOCK_DEAD)) {
+                               _debug("call %ps", sk->sk_data_ready);
+                               sk->sk_data_ready(sk);
+                       }
+               }
        }
 
-       read_lock_bh(&call->state_lock);
-       if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
-           !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
-               rxrpc_queue_call(call);
-       read_unlock_bh(&call->state_lock);
+       rcu_read_unlock();
+       _leave("");
 }
 
 /*
- * receive a message from an RxRPC socket
- * - we need to be careful about two or more threads calling recvmsg
- *   simultaneously
+ * Pass a call terminating message to userspace.
  */
-int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
-                 int flags)
+static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
 {
-       struct rxrpc_skb_priv *sp;
-       struct rxrpc_call *call = NULL, *continue_call = NULL;
-       struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
-       struct sk_buff *skb;
-       long timeo;
-       int copy, ret, ullen, offset, copied = 0;
-       u32 abort_code;
+       u32 tmp = 0;
+       int ret;
+
+       switch (call->completion) {
+       case RXRPC_CALL_SUCCEEDED:
+               ret = 0;
+               if (rxrpc_is_service_call(call))
+                       ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &tmp);
+               break;
+       case RXRPC_CALL_REMOTELY_ABORTED:
+               tmp = call->abort_code;
+               ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp);
+               break;
+       case RXRPC_CALL_LOCALLY_ABORTED:
+               tmp = call->abort_code;
+               ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp);
+               break;
+       case RXRPC_CALL_NETWORK_ERROR:
+               tmp = call->error;
+               ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp);
+               break;
+       case RXRPC_CALL_LOCAL_ERROR:
+               tmp = call->error;
+               ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp);
+               break;
+       default:
+               pr_err("Invalid terminal call state %u\n", call->state);
+               BUG();
+               break;
+       }
 
-       DEFINE_WAIT(wait);
+       return ret;
+}
 
-       _enter(",,,%zu,%d", len, flags);
+/*
+ * Pass back notification of a new call.  The call is added to the
+ * to-be-accepted list.  This means that the next call to be accepted might not
+ * be the last call seen awaiting acceptance, but unless we leave this on the
+ * front of the queue and block all other messages until someone gives us a
+ * user_ID for it, there's not a lot we can do.
+ */
+static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx,
+                                 struct rxrpc_call *call,
+                                 struct msghdr *msg, int flags)
+{
+       int tmp = 0, ret;
 
-       if (flags & (MSG_OOB | MSG_TRUNC))
-               return -EOPNOTSUPP;
+       ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &tmp);
 
-       ullen = msg->msg_flags & MSG_CMSG_COMPAT ? 4 : sizeof(unsigned long);
+       if (ret == 0 && !(flags & MSG_PEEK)) {
+               _debug("to be accepted");
+               write_lock_bh(&rx->recvmsg_lock);
+               list_del_init(&call->recvmsg_link);
+               write_unlock_bh(&rx->recvmsg_lock);
 
-       timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT);
-       msg->msg_flags |= MSG_MORE;
+               rxrpc_get_call(call, rxrpc_call_got);
+               write_lock(&rx->call_lock);
+               list_add_tail(&call->accept_link, &rx->to_be_accepted);
+               write_unlock(&rx->call_lock);
+       }
 
-       lock_sock(&rx->sk);
+       return ret;
+}
 
-       for (;;) {
-               /* return immediately if a client socket has no outstanding
-                * calls */
-               if (RB_EMPTY_ROOT(&rx->calls)) {
-                       if (copied)
-                               goto out;
-                       if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) {
-                               release_sock(&rx->sk);
-                               if (continue_call)
-                                       rxrpc_put_call(continue_call);
-                               return -ENODATA;
-                       }
-               }
+/*
+ * End the packet reception phase.
+ */
+static void rxrpc_end_rx_phase(struct rxrpc_call *call)
+{
+       _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]);
 
-               /* get the next message on the Rx queue */
-               skb = skb_peek(&rx->sk.sk_receive_queue);
-               if (!skb) {
-                       /* nothing remains on the queue */
-                       if (copied &&
-                           (flags & MSG_PEEK || timeo == 0))
-                               goto out;
+       if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) {
+               rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false);
+               rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+       } else {
+               rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, false);
+       }
 
-                       /* wait for a message to turn up */
-                       release_sock(&rx->sk);
-                       prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait,
-                                                 TASK_INTERRUPTIBLE);
-                       ret = sock_error(&rx->sk);
-                       if (ret)
-                               goto wait_error;
-
-                       if (skb_queue_empty(&rx->sk.sk_receive_queue)) {
-                               if (signal_pending(current))
-                                       goto wait_interrupted;
-                               timeo = schedule_timeout(timeo);
-                       }
-                       finish_wait(sk_sleep(&rx->sk), &wait);
-                       lock_sock(&rx->sk);
-                       continue;
-               }
+       write_lock_bh(&call->state_lock);
 
-       peek_next_packet:
-               sp = rxrpc_skb(skb);
-               call = sp->call;
-               ASSERT(call != NULL);
+       switch (call->state) {
+       case RXRPC_CALL_CLIENT_RECV_REPLY:
+               __rxrpc_call_completed(call);
+               break;
 
-               _debug("next pkt %s", rxrpc_pkts[sp->hdr.type]);
+       case RXRPC_CALL_SERVER_RECV_REQUEST:
+               call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
+               break;
+       default:
+               break;
+       }
 
-               /* make sure we wait for the state to be updated in this call */
-               spin_lock_bh(&call->lock);
-               spin_unlock_bh(&call->lock);
+       write_unlock_bh(&call->state_lock);
+}
 
-               if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
-                       _debug("packet from released call");
-                       if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
-                               BUG();
-                       rxrpc_free_skb(skb);
-                       continue;
-               }
+/*
+ * Discard a packet we've used up and advance the Rx window by one.
+ */
+static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
+{
+       struct sk_buff *skb;
+       rxrpc_seq_t hard_ack, top;
+       int ix;
 
-               /* determine whether to continue last data receive */
-               if (continue_call) {
-                       _debug("maybe cont");
-                       if (call != continue_call ||
-                           skb->mark != RXRPC_SKB_MARK_DATA) {
-                               release_sock(&rx->sk);
-                               rxrpc_put_call(continue_call);
-                               _leave(" = %d [noncont]", copied);
-                               return copied;
-                       }
-               }
+       _enter("%d", call->debug_id);
 
-               rxrpc_get_call(call);
+       hard_ack = call->rx_hard_ack;
+       top = smp_load_acquire(&call->rx_top);
+       ASSERT(before(hard_ack, top));
 
-               /* copy the peer address and timestamp */
-               if (!continue_call) {
-                       if (msg->msg_name) {
-                               size_t len =
-                                       sizeof(call->conn->params.peer->srx);
-                               memcpy(msg->msg_name,
-                                      &call->conn->params.peer->srx, len);
-                               msg->msg_namelen = len;
-                       }
-                       sock_recv_timestamp(msg, &rx->sk, skb);
-               }
+       hard_ack++;
+       ix = hard_ack & RXRPC_RXTX_BUFF_MASK;
+       skb = call->rxtx_buffer[ix];
+       rxrpc_see_skb(skb);
+       call->rxtx_buffer[ix] = NULL;
+       call->rxtx_annotations[ix] = 0;
+       /* Barrier against rxrpc_input_data(). */
+       smp_store_release(&call->rx_hard_ack, hard_ack);
 
-               /* receive the message */
-               if (skb->mark != RXRPC_SKB_MARK_DATA)
-                       goto receive_non_data_message;
+       rxrpc_free_skb(skb);
 
-               _debug("recvmsg DATA #%u { %d, %d }",
-                      sp->hdr.seq, skb->len, sp->offset);
+       _debug("%u,%u,%lx", hard_ack, top, call->flags);
+       if (hard_ack == top && test_bit(RXRPC_CALL_RX_LAST, &call->flags))
+               rxrpc_end_rx_phase(call);
+}
 
-               if (!continue_call) {
-                       /* only set the control data once per recvmsg() */
-                       ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
-                                      ullen, &call->user_call_ID);
-                       if (ret < 0)
-                               goto copy_error;
-                       ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
-               }
+/*
+ * Decrypt and verify a (sub)packet.  The packet's length may be changed due to
+ * padding, but if this is the case, the packet length will be resident in the
+ * socket buffer.  Note that we can't modify the master skb info as the skb may
+ * be the home to multiple subpackets.
+ */
+static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
+                              u8 annotation,
+                              unsigned int offset, unsigned int len)
+{
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+       rxrpc_seq_t seq = sp->hdr.seq;
+       u16 cksum = sp->hdr.cksum;
 
-               ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
-               ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
-               call->rx_data_recv = sp->hdr.seq;
+       _enter("");
 
-               ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
+       /* For all but the head jumbo subpacket, the security checksum is in a
+        * jumbo header immediately prior to the data.
+        */
+       if ((annotation & RXRPC_RX_ANNO_JUMBO) > 1) {
+               __be16 tmp;
+               if (skb_copy_bits(skb, offset - 2, &tmp, 2) < 0)
+                       BUG();
+               cksum = ntohs(tmp);
+               seq += (annotation & RXRPC_RX_ANNO_JUMBO) - 1;
+       }
 
-               offset = sp->offset;
-               copy = skb->len - offset;
-               if (copy > len - copied)
-                       copy = len - copied;
+       return call->conn->security->verify_packet(call, skb, offset, len,
+                                                  seq, cksum);
+}
 
-               ret = skb_copy_datagram_msg(skb, offset, msg, copy);
+/*
+ * Locate the data within a packet.  This is complicated by:
+ *
+ * (1) An skb may contain a jumbo packet - so we have to find the appropriate
+ *     subpacket.
+ *
+ * (2) The (sub)packets may be encrypted and, if so, the encrypted portion
+ *     contains an extra header which includes the true length of the data,
+ *     excluding any encrypted padding.
+ */
+static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
+                            u8 *_annotation,
+                            unsigned int *_offset, unsigned int *_len)
+{
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+       unsigned int offset = *_offset;
+       unsigned int len = *_len;
+       int ret;
+       u8 annotation = *_annotation;
+
+       if (offset > 0)
+               return 0;
+
+       /* Locate the subpacket */
+       offset = sp->offset;
+       len = skb->len - sp->offset;
+       if ((annotation & RXRPC_RX_ANNO_JUMBO) > 0) {
+               offset += (((annotation & RXRPC_RX_ANNO_JUMBO) - 1) *
+                          RXRPC_JUMBO_SUBPKTLEN);
+               len = (annotation & RXRPC_RX_ANNO_JLAST) ?
+                       skb->len - offset : RXRPC_JUMBO_SUBPKTLEN;
+       }
 
+       if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) {
+               ret = rxrpc_verify_packet(call, skb, annotation, offset, len);
                if (ret < 0)
-                       goto copy_error;
+                       return ret;
+               *_annotation |= RXRPC_RX_ANNO_VERIFIED;
+       }
+
+       *_offset = offset;
+       *_len = len;
+       call->conn->security->locate_data(call, skb, _offset, _len);
+       return 0;
+}
+
+/*
+ * Deliver messages to a call.  This keeps processing packets until the buffer
+ * is filled and we find either more DATA (returns 0) or the end of the DATA
+ * (returns 1).  If more packets are required, it returns -EAGAIN.
+ */
+static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
+                             struct msghdr *msg, struct iov_iter *iter,
+                             size_t len, int flags, size_t *_offset)
+{
+       struct rxrpc_skb_priv *sp;
+       struct sk_buff *skb;
+       rxrpc_seq_t hard_ack, top, seq;
+       size_t remain;
+       bool last;
+       unsigned int rx_pkt_offset, rx_pkt_len;
+       int ix, copy, ret = 0;
+
+       _enter("");
+
+       rx_pkt_offset = call->rx_pkt_offset;
+       rx_pkt_len = call->rx_pkt_len;
+
+       /* Barriers against rxrpc_input_data(). */
+       hard_ack = call->rx_hard_ack;
+       top = smp_load_acquire(&call->rx_top);
+       for (seq = hard_ack + 1; before_eq(seq, top); seq++) {
+               ix = seq & RXRPC_RXTX_BUFF_MASK;
+               skb = call->rxtx_buffer[ix];
+               if (!skb)
+                       break;
+               smp_rmb();
+               rxrpc_see_skb(skb);
+               sp = rxrpc_skb(skb);
 
-               /* handle piecemeal consumption of data packets */
-               _debug("copied %d+%d", copy, copied);
+               if (msg)
+                       sock_recv_timestamp(msg, sock->sk, skb);
+
+               ret = rxrpc_locate_data(call, skb, &call->rxtx_annotations[ix],
+                                       &rx_pkt_offset, &rx_pkt_len);
+               _debug("recvmsg %x DATA #%u { %d, %d }",
+                      sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len);
+
+               /* We have to handle short, empty and used-up DATA packets. */
+               remain = len - *_offset;
+               copy = rx_pkt_len;
+               if (copy > remain)
+                       copy = remain;
+               if (copy > 0) {
+                       ret = skb_copy_datagram_iter(skb, rx_pkt_offset, iter,
+                                                    copy);
+                       if (ret < 0)
+                               goto out;
 
-               offset += copy;
-               copied += copy;
+                       /* handle piecemeal consumption of data packets */
+                       _debug("copied %d @%zu", copy, *_offset);
 
-               if (!(flags & MSG_PEEK))
-                       sp->offset = offset;
+                       rx_pkt_offset += copy;
+                       rx_pkt_len -= copy;
+                       *_offset += copy;
+               }
 
-               if (sp->offset < skb->len) {
+               if (rx_pkt_len > 0) {
                        _debug("buffer full");
-                       ASSERTCMP(copied, ==, len);
+                       ASSERTCMP(*_offset, ==, len);
                        break;
                }
 
-               /* we transferred the whole data packet */
+               /* The whole packet has been transferred. */
+               last = sp->hdr.flags & RXRPC_LAST_PACKET;
                if (!(flags & MSG_PEEK))
-                       rxrpc_kernel_data_consumed(call, skb);
-
-               if (sp->hdr.flags & RXRPC_LAST_PACKET) {
-                       _debug("last");
-                       if (rxrpc_conn_is_client(call->conn)) {
-                                /* last byte of reply received */
-                               ret = copied;
-                               goto terminal_message;
-                       }
+                       rxrpc_rotate_rx_window(call);
+               rx_pkt_offset = 0;
+               rx_pkt_len = 0;
 
-                       /* last bit of request received */
-                       if (!(flags & MSG_PEEK)) {
-                               _debug("eat packet");
-                               if (skb_dequeue(&rx->sk.sk_receive_queue) !=
-                                   skb)
-                                       BUG();
-                               rxrpc_free_skb(skb);
-                       }
-                       msg->msg_flags &= ~MSG_MORE;
-                       break;
-               }
+               ASSERTIFCMP(last, seq, ==, top);
+       }
 
-               /* move on to the next data message */
-               _debug("next");
-               if (!continue_call)
-                       continue_call = sp->call;
-               else
-                       rxrpc_put_call(call);
-               call = NULL;
-
-               if (flags & MSG_PEEK) {
-                       _debug("peek next");
-                       skb = skb->next;
-                       if (skb == (struct sk_buff *) &rx->sk.sk_receive_queue)
-                               break;
-                       goto peek_next_packet;
-               }
+       if (after(seq, top)) {
+               ret = -EAGAIN;
+               if (test_bit(RXRPC_CALL_RX_LAST, &call->flags))
+                       ret = 1;
+       }
+out:
+       if (!(flags & MSG_PEEK)) {
+               call->rx_pkt_offset = rx_pkt_offset;
+               call->rx_pkt_len = rx_pkt_len;
+       }
+       _leave(" = %d [%u/%u]", ret, seq, top);
+       return ret;
+}
 
-               _debug("eat packet");
-               if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
-                       BUG();
-               rxrpc_free_skb(skb);
+/*
+ * Receive a message from an RxRPC socket
+ * - we need to be careful about two or more threads calling recvmsg
+ *   simultaneously
+ */
+int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+                 int flags)
+{
+       struct rxrpc_call *call;
+       struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+       struct list_head *l;
+       size_t copied = 0;
+       long timeo;
+       int ret;
+
+       DEFINE_WAIT(wait);
+
+       _enter(",,,%zu,%d", len, flags);
+
+       if (flags & (MSG_OOB | MSG_TRUNC))
+               return -EOPNOTSUPP;
+
+       timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT);
+
+try_again:
+       lock_sock(&rx->sk);
+
+       /* Return immediately if a client socket has no outstanding calls */
+       if (RB_EMPTY_ROOT(&rx->calls) &&
+           list_empty(&rx->recvmsg_q) &&
+           rx->sk.sk_state != RXRPC_SERVER_LISTENING) {
+               release_sock(&rx->sk);
+               return -ENODATA;
        }
 
-       /* end of non-terminal data packet reception for the moment */
-       _debug("end rcv data");
-out:
-       release_sock(&rx->sk);
-       if (call)
-               rxrpc_put_call(call);
-       if (continue_call)
-               rxrpc_put_call(continue_call);
-       _leave(" = %d [data]", copied);
-       return copied;
-
-       /* handle non-DATA messages such as aborts, incoming connections and
-        * final ACKs */
-receive_non_data_message:
-       _debug("non-data");
-
-       if (skb->mark == RXRPC_SKB_MARK_NEW_CALL) {
-               _debug("RECV NEW CALL");
-               ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &abort_code);
-               if (ret < 0)
-                       goto copy_error;
-               if (!(flags & MSG_PEEK)) {
-                       if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
-                               BUG();
-                       rxrpc_free_skb(skb);
+       if (list_empty(&rx->recvmsg_q)) {
+               ret = -EWOULDBLOCK;
+               if (timeo == 0)
+                       goto error_no_call;
+
+               release_sock(&rx->sk);
+
+               /* Wait for something to happen */
+               prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait,
+                                         TASK_INTERRUPTIBLE);
+               ret = sock_error(&rx->sk);
+               if (ret)
+                       goto wait_error;
+
+               if (list_empty(&rx->recvmsg_q)) {
+                       if (signal_pending(current))
+                               goto wait_interrupted;
+                       timeo = schedule_timeout(timeo);
                }
-               goto out;
+               finish_wait(sk_sleep(&rx->sk), &wait);
+               goto try_again;
        }
 
-       ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
-                      ullen, &call->user_call_ID);
-       if (ret < 0)
-               goto copy_error;
-       ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
-
-       switch (skb->mark) {
-       case RXRPC_SKB_MARK_DATA:
+       /* Find the next call and dequeue it if we're not just peeking.  If we
+        * do dequeue it, that comes with a ref that we will need to release.
+        */
+       write_lock_bh(&rx->recvmsg_lock);
+       l = rx->recvmsg_q.next;
+       call = list_entry(l, struct rxrpc_call, recvmsg_link);
+       if (!(flags & MSG_PEEK))
+               list_del_init(&call->recvmsg_link);
+       else
+               rxrpc_get_call(call, rxrpc_call_got);
+       write_unlock_bh(&rx->recvmsg_lock);
+
+       _debug("recvmsg call %p", call);
+
+       if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
                BUG();
-       case RXRPC_SKB_MARK_FINAL_ACK:
-               ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &abort_code);
-               break;
-       case RXRPC_SKB_MARK_BUSY:
-               ret = put_cmsg(msg, SOL_RXRPC, RXRPC_BUSY, 0, &abort_code);
-               break;
-       case RXRPC_SKB_MARK_REMOTE_ABORT:
-               abort_code = call->remote_abort;
-               ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code);
-               break;
-       case RXRPC_SKB_MARK_LOCAL_ABORT:
-               abort_code = call->local_abort;
-               ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code);
-               break;
-       case RXRPC_SKB_MARK_NET_ERROR:
-               _debug("RECV NET ERROR %d", sp->error);
-               abort_code = sp->error;
-               ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &abort_code);
+
+       if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+               if (flags & MSG_CMSG_COMPAT) {
+                       unsigned int id32 = call->user_call_ID;
+
+                       ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+                                      sizeof(unsigned int), &id32);
+               } else {
+                       ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+                                      sizeof(unsigned long),
+                                      &call->user_call_ID);
+               }
+               if (ret < 0)
+                       goto error;
+       }
+
+       if (msg->msg_name) {
+               size_t len = sizeof(call->conn->params.peer->srx);
+               memcpy(msg->msg_name, &call->conn->params.peer->srx, len);
+               msg->msg_namelen = len;
+       }
+
+       switch (call->state) {
+       case RXRPC_CALL_SERVER_ACCEPTING:
+               ret = rxrpc_recvmsg_new_call(rx, call, msg, flags);
                break;
-       case RXRPC_SKB_MARK_LOCAL_ERROR:
-               _debug("RECV LOCAL ERROR %d", sp->error);
-               abort_code = sp->error;
-               ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4,
-                              &abort_code);
+       case RXRPC_CALL_CLIENT_RECV_REPLY:
+       case RXRPC_CALL_SERVER_RECV_REQUEST:
+       case RXRPC_CALL_SERVER_ACK_REQUEST:
+               ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len,
+                                        flags, &copied);
+               if (ret == -EAGAIN)
+                       ret = 0;
+
+               if (after(call->rx_top, call->rx_hard_ack) &&
+                   call->rxtx_buffer[(call->rx_hard_ack + 1) & RXRPC_RXTX_BUFF_MASK])
+                       rxrpc_notify_socket(call);
                break;
        default:
-               pr_err("Unknown packet mark %u\n", skb->mark);
-               BUG();
+               ret = 0;
                break;
        }
 
        if (ret < 0)
-               goto copy_error;
-
-terminal_message:
-       _debug("terminal");
-       msg->msg_flags &= ~MSG_MORE;
-       msg->msg_flags |= MSG_EOR;
+               goto error;
 
-       if (!(flags & MSG_PEEK)) {
-               _net("free terminal skb %p", skb);
-               if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
-                       BUG();
-               rxrpc_free_skb(skb);
-               rxrpc_remove_user_ID(rx, call);
+       if (call->state == RXRPC_CALL_COMPLETE) {
+               ret = rxrpc_recvmsg_term(call, msg);
+               if (ret < 0)
+                       goto error;
+               if (!(flags & MSG_PEEK))
+                       rxrpc_release_call(rx, call);
+               msg->msg_flags |= MSG_EOR;
+               ret = 1;
        }
 
-       release_sock(&rx->sk);
-       rxrpc_put_call(call);
-       if (continue_call)
-               rxrpc_put_call(continue_call);
-       _leave(" = %d", ret);
-       return ret;
+       if (ret == 0)
+               msg->msg_flags |= MSG_MORE;
+       else
+               msg->msg_flags &= ~MSG_MORE;
+       ret = copied;
 
-copy_error:
-       _debug("copy error");
+error:
+       rxrpc_put_call(call, rxrpc_call_put);
+error_no_call:
        release_sock(&rx->sk);
-       rxrpc_put_call(call);
-       if (continue_call)
-               rxrpc_put_call(continue_call);
        _leave(" = %d", ret);
        return ret;
 
@@ -353,65 +504,111 @@ wait_interrupted:
        ret = sock_intr_errno(timeo);
 wait_error:
        finish_wait(sk_sleep(&rx->sk), &wait);
-       if (continue_call)
-               rxrpc_put_call(continue_call);
-       if (copied)
-               copied = ret;
-       _leave(" = %d [waitfail %d]", copied, ret);
-       return copied;
-
+       release_sock(&rx->sk);
+       _leave(" = %d [wait]", ret);
+       return ret;
 }
 
 /**
- * rxrpc_kernel_is_data_last - Determine if data message is last one
- * @skb: Message holding data
+ * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info
+ * @sock: The socket that the call exists on
+ * @call: The call to send data through
+ * @buf: The buffer to receive into
+ * @size: The size of the buffer, including data already read
+ * @_offset: The running offset into the buffer.
+ * @want_more: True if more data is expected to be read
+ * @_abort: Where the abort code is stored if -ECONNABORTED is returned
+ *
+ * Allow a kernel service to receive data and pick up information about the
+ * state of a call.  Returns 0 if got what was asked for and there's more
+ * available, 1 if we got what was asked for and we're at the end of the data
+ * and -EAGAIN if we need more data.
+ *
+ * Note that we may return -EAGAIN to drain empty packets at the end of the
+ * data, even if we've already copied over the requested data.
  *
- * Determine if data message is last one for the parent call.
+ * This function adds the amount it transfers to *_offset, so this should be
+ * precleared as appropriate.  Note that the amount remaining in the buffer is
+ * taken to be size - *_offset.
+ *
+ * *_abort should also be initialised to 0.
  */
-bool rxrpc_kernel_is_data_last(struct sk_buff *skb)
+int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
+                          void *buf, size_t size, size_t *_offset,
+                          bool want_more, u32 *_abort)
 {
-       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+       struct iov_iter iter;
+       struct kvec iov;
+       int ret;
 
-       ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_DATA);
+       _enter("{%d,%s},%zu/%zu,%d",
+              call->debug_id, rxrpc_call_states[call->state],
+              *_offset, size, want_more);
 
-       return sp->hdr.flags & RXRPC_LAST_PACKET;
-}
+       ASSERTCMP(*_offset, <=, size);
+       ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING);
 
-EXPORT_SYMBOL(rxrpc_kernel_is_data_last);
+       iov.iov_base = buf + *_offset;
+       iov.iov_len = size - *_offset;
+       iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset);
 
-/**
- * rxrpc_kernel_get_abort_code - Get the abort code from an RxRPC abort message
- * @skb: Message indicating an abort
- *
- * Get the abort code from an RxRPC abort message.
- */
-u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb)
-{
-       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+       lock_sock(sock->sk);
+
+       switch (call->state) {
+       case RXRPC_CALL_CLIENT_RECV_REPLY:
+       case RXRPC_CALL_SERVER_RECV_REQUEST:
+       case RXRPC_CALL_SERVER_ACK_REQUEST:
+               ret = rxrpc_recvmsg_data(sock, call, NULL, &iter, size, 0,
+                                        _offset);
+               if (ret < 0)
+                       goto out;
+
+               /* We can only reach here with a partially full buffer if we
+                * have reached the end of the data.  We must otherwise have a
+                * full buffer or have been given -EAGAIN.
+                */
+               if (ret == 1) {
+                       if (*_offset < size)
+                               goto short_data;
+                       if (!want_more)
+                               goto read_phase_complete;
+                       ret = 0;
+                       goto out;
+               }
+
+               if (!want_more)
+                       goto excess_data;
+               goto out;
+
+       case RXRPC_CALL_COMPLETE:
+               goto call_complete;
 
-       switch (skb->mark) {
-       case RXRPC_SKB_MARK_REMOTE_ABORT:
-               return sp->call->remote_abort;
-       case RXRPC_SKB_MARK_LOCAL_ABORT:
-               return sp->call->local_abort;
        default:
-               BUG();
+               ret = -EINPROGRESS;
+               goto out;
        }
-}
-
-EXPORT_SYMBOL(rxrpc_kernel_get_abort_code);
 
-/**
- * rxrpc_kernel_get_error - Get the error number from an RxRPC error message
- * @skb: Message indicating an error
- *
- * Get the error number from an RxRPC error message.
- */
-int rxrpc_kernel_get_error_number(struct sk_buff *skb)
-{
-       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+read_phase_complete:
+       ret = 1;
+out:
+       release_sock(sock->sk);
+       _leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
+       return ret;
 
-       return sp->error;
+short_data:
+       ret = -EBADMSG;
+       goto out;
+excess_data:
+       ret = -EMSGSIZE;
+       goto out;
+call_complete:
+       *_abort = call->abort_code;
+       ret = call->error;
+       if (call->completion == RXRPC_CALL_SUCCEEDED) {
+               ret = 1;
+               if (size > 0)
+                       ret = -ECONNRESET;
+       }
+       goto out;
 }
-
-EXPORT_SYMBOL(rxrpc_kernel_get_error_number);
+EXPORT_SYMBOL(rxrpc_kernel_recv_data);
index 63afa9e..ae39255 100644 (file)
@@ -275,7 +275,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
        memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
 
        /* calculate the security checksum */
-       x = call->channel << (32 - RXRPC_CIDSHIFT);
+       x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
        x |= sp->hdr.seq & 0x3fffffff;
        call->crypto_buf[0] = htonl(sp->hdr.callNumber);
        call->crypto_buf[1] = htonl(x);
@@ -316,12 +316,11 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
 /*
  * decrypt partial encryption on a packet (level 1 security)
  */
-static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
-                                   struct sk_buff *skb,
-                                   u32 *_abort_code)
+static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
+                                unsigned int offset, unsigned int len,
+                                rxrpc_seq_t seq)
 {
        struct rxkad_level1_hdr sechdr;
-       struct rxrpc_skb_priv *sp;
        SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
        struct rxrpc_crypt iv;
        struct scatterlist sg[16];
@@ -332,15 +331,20 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
 
        _enter("");
 
-       sp = rxrpc_skb(skb);
+       if (len < 8) {
+               rxrpc_abort_call("V1H", call, seq, RXKADSEALEDINCON, EPROTO);
+               goto protocol_error;
+       }
 
-       /* we want to decrypt the skbuff in-place */
+       /* Decrypt the skbuff in-place.  TODO: We really want to decrypt
+        * directly into the target buffer.
+        */
        nsg = skb_cow_data(skb, 0, &trailer);
        if (nsg < 0 || nsg > 16)
                goto nomem;
 
        sg_init_table(sg, nsg);
-       skb_to_sgvec(skb, sg, 0, 8);
+       skb_to_sgvec(skb, sg, offset, 8);
 
        /* start the decryption afresh */
        memset(&iv, 0, sizeof(iv));
@@ -351,35 +355,35 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
        crypto_skcipher_decrypt(req);
        skcipher_request_zero(req);
 
-       /* remove the decrypted packet length */
-       if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
-               goto datalen_error;
-       if (!skb_pull(skb, sizeof(sechdr)))
-               BUG();
+       /* Extract the decrypted packet length */
+       if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) {
+               rxrpc_abort_call("XV1", call, seq, RXKADDATALEN, EPROTO);
+               goto protocol_error;
+       }
+       offset += sizeof(sechdr);
+       len -= sizeof(sechdr);
 
        buf = ntohl(sechdr.data_size);
        data_size = buf & 0xffff;
 
        check = buf >> 16;
-       check ^= sp->hdr.seq ^ sp->hdr.callNumber;
+       check ^= seq ^ call->call_id;
        check &= 0xffff;
        if (check != 0) {
-               *_abort_code = RXKADSEALEDINCON;
+               rxrpc_abort_call("V1C", call, seq, RXKADSEALEDINCON, EPROTO);
                goto protocol_error;
        }
 
-       /* shorten the packet to remove the padding */
-       if (data_size > skb->len)
-               goto datalen_error;
-       else if (data_size < skb->len)
-               skb->len = data_size;
+       if (data_size > len) {
+               rxrpc_abort_call("V1L", call, seq, RXKADDATALEN, EPROTO);
+               goto protocol_error;
+       }
 
        _leave(" = 0 [dlen=%x]", data_size);
        return 0;
 
-datalen_error:
-       *_abort_code = RXKADDATALEN;
 protocol_error:
+       rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
        _leave(" = -EPROTO");
        return -EPROTO;
 
@@ -391,13 +395,12 @@ nomem:
 /*
  * wholly decrypt a packet (level 2 security)
  */
-static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
-                                      struct sk_buff *skb,
-                                      u32 *_abort_code)
+static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
+                                unsigned int offset, unsigned int len,
+                                rxrpc_seq_t seq)
 {
        const struct rxrpc_key_token *token;
        struct rxkad_level2_hdr sechdr;
-       struct rxrpc_skb_priv *sp;
        SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
        struct rxrpc_crypt iv;
        struct scatterlist _sg[4], *sg;
@@ -408,9 +411,14 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
 
        _enter(",{%d}", skb->len);
 
-       sp = rxrpc_skb(skb);
+       if (len < 8) {
+               rxrpc_abort_call("V2H", call, seq, RXKADSEALEDINCON, EPROTO);
+               goto protocol_error;
+       }
 
-       /* we want to decrypt the skbuff in-place */
+       /* Decrypt the skbuff in-place.  TODO: We really want to decrypt
+        * directly into the target buffer.
+        */
        nsg = skb_cow_data(skb, 0, &trailer);
        if (nsg < 0)
                goto nomem;
@@ -423,7 +431,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
        }
 
        sg_init_table(sg, nsg);
-       skb_to_sgvec(skb, sg, 0, skb->len);
+       skb_to_sgvec(skb, sg, offset, len);
 
        /* decrypt from the session key */
        token = call->conn->params.key->payload.data[0];
@@ -431,41 +439,41 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
 
        skcipher_request_set_tfm(req, call->conn->cipher);
        skcipher_request_set_callback(req, 0, NULL, NULL);
-       skcipher_request_set_crypt(req, sg, sg, skb->len, iv.x);
+       skcipher_request_set_crypt(req, sg, sg, len, iv.x);
        crypto_skcipher_decrypt(req);
        skcipher_request_zero(req);
        if (sg != _sg)
                kfree(sg);
 
-       /* remove the decrypted packet length */
-       if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
-               goto datalen_error;
-       if (!skb_pull(skb, sizeof(sechdr)))
-               BUG();
+       /* Extract the decrypted packet length */
+       if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) {
+               rxrpc_abort_call("XV2", call, seq, RXKADDATALEN, EPROTO);
+               goto protocol_error;
+       }
+       offset += sizeof(sechdr);
+       len -= sizeof(sechdr);
 
        buf = ntohl(sechdr.data_size);
        data_size = buf & 0xffff;
 
        check = buf >> 16;
-       check ^= sp->hdr.seq ^ sp->hdr.callNumber;
+       check ^= seq ^ call->call_id;
        check &= 0xffff;
        if (check != 0) {
-               *_abort_code = RXKADSEALEDINCON;
+               rxrpc_abort_call("V2C", call, seq, RXKADSEALEDINCON, EPROTO);
                goto protocol_error;
        }
 
-       /* shorten the packet to remove the padding */
-       if (data_size > skb->len)
-               goto datalen_error;
-       else if (data_size < skb->len)
-               skb->len = data_size;
+       if (data_size > len) {
+               rxrpc_abort_call("V2L", call, seq, RXKADDATALEN, EPROTO);
+               goto protocol_error;
+       }
 
        _leave(" = 0 [dlen=%x]", data_size);
        return 0;
 
-datalen_error:
-       *_abort_code = RXKADDATALEN;
 protocol_error:
+       rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
        _leave(" = -EPROTO");
        return -EPROTO;
 
@@ -475,40 +483,31 @@ nomem:
 }
 
 /*
- * verify the security on a received packet
+ * Verify the security on a received packet or subpacket (if part of a
+ * jumbo packet).
  */
-static int rxkad_verify_packet(struct rxrpc_call *call,
-                              struct sk_buff *skb,
-                              u32 *_abort_code)
+static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
+                              unsigned int offset, unsigned int len,
+                              rxrpc_seq_t seq, u16 expected_cksum)
 {
        SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
-       struct rxrpc_skb_priv *sp;
        struct rxrpc_crypt iv;
        struct scatterlist sg;
        u16 cksum;
        u32 x, y;
-       int ret;
-
-       sp = rxrpc_skb(skb);
 
        _enter("{%d{%x}},{#%u}",
-              call->debug_id, key_serial(call->conn->params.key), sp->hdr.seq);
+              call->debug_id, key_serial(call->conn->params.key), seq);
 
        if (!call->conn->cipher)
                return 0;
 
-       if (sp->hdr.securityIndex != RXRPC_SECURITY_RXKAD) {
-               *_abort_code = RXKADINCONSISTENCY;
-               _leave(" = -EPROTO [not rxkad]");
-               return -EPROTO;
-       }
-
        /* continue encrypting from where we left off */
        memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
 
        /* validate the security checksum */
-       x = call->channel << (32 - RXRPC_CIDSHIFT);
-       x |= sp->hdr.seq & 0x3fffffff;
+       x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
+       x |= seq & 0x3fffffff;
        call->crypto_buf[0] = htonl(call->call_id);
        call->crypto_buf[1] = htonl(x);
 
@@ -524,29 +523,69 @@ static int rxkad_verify_packet(struct rxrpc_call *call,
        if (cksum == 0)
                cksum = 1; /* zero checksums are not permitted */
 
-       if (sp->hdr.cksum != cksum) {
-               *_abort_code = RXKADSEALEDINCON;
+       if (cksum != expected_cksum) {
+               rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO);
+               rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
                _leave(" = -EPROTO [csum failed]");
                return -EPROTO;
        }
 
        switch (call->conn->params.security_level) {
        case RXRPC_SECURITY_PLAIN:
-               ret = 0;
-               break;
+               return 0;
        case RXRPC_SECURITY_AUTH:
-               ret = rxkad_verify_packet_auth(call, skb, _abort_code);
-               break;
+               return rxkad_verify_packet_1(call, skb, offset, len, seq);
        case RXRPC_SECURITY_ENCRYPT:
-               ret = rxkad_verify_packet_encrypt(call, skb, _abort_code);
-               break;
+               return rxkad_verify_packet_2(call, skb, offset, len, seq);
        default:
-               ret = -ENOANO;
-               break;
+               return -ENOANO;
        }
+}
 
-       _leave(" = %d", ret);
-       return ret;
+/*
+ * Locate the data contained in a packet that was partially encrypted.
+ */
+static void rxkad_locate_data_1(struct rxrpc_call *call, struct sk_buff *skb,
+                               unsigned int *_offset, unsigned int *_len)
+{
+       struct rxkad_level1_hdr sechdr;
+
+       if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0)
+               BUG();
+       *_offset += sizeof(sechdr);
+       *_len = ntohl(sechdr.data_size) & 0xffff;
+}
+
+/*
+ * Locate the data contained in a packet that was completely encrypted.
+ */
+static void rxkad_locate_data_2(struct rxrpc_call *call, struct sk_buff *skb,
+                               unsigned int *_offset, unsigned int *_len)
+{
+       struct rxkad_level2_hdr sechdr;
+
+       if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0)
+               BUG();
+       *_offset += sizeof(sechdr);
+       *_len = ntohl(sechdr.data_size) & 0xffff;
+}
+
+/*
+ * Locate the data contained in an already decrypted packet.
+ */
+static void rxkad_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
+                             unsigned int *_offset, unsigned int *_len)
+{
+       switch (call->conn->params.security_level) {
+       case RXRPC_SECURITY_AUTH:
+               rxkad_locate_data_1(call, skb, _offset, _len);
+               return;
+       case RXRPC_SECURITY_ENCRYPT:
+               rxkad_locate_data_2(call, skb, _offset, _len);
+               return;
+       default:
+               return;
+       }
 }
 
 /*
@@ -716,7 +755,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
        struct rxkad_challenge challenge;
        struct rxkad_response resp
                __attribute__((aligned(8))); /* must be aligned for crypto */
-       struct rxrpc_skb_priv *sp;
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
        u32 version, nonce, min_level, abort_code;
        int ret;
 
@@ -734,8 +773,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
        }
 
        abort_code = RXKADPACKETSHORT;
-       sp = rxrpc_skb(skb);
-       if (skb_copy_bits(skb, 0, &challenge, sizeof(challenge)) < 0)
+       if (skb_copy_bits(skb, sp->offset, &challenge, sizeof(challenge)) < 0)
                goto protocol_error;
 
        version = ntohl(challenge.version);
@@ -981,7 +1019,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 {
        struct rxkad_response response
                __attribute__((aligned(8))); /* must be aligned for crypto */
-       struct rxrpc_skb_priv *sp;
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
        struct rxrpc_crypt session_key;
        time_t expiry;
        void *ticket;
@@ -992,7 +1030,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
        _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
 
        abort_code = RXKADPACKETSHORT;
-       if (skb_copy_bits(skb, 0, &response, sizeof(response)) < 0)
+       if (skb_copy_bits(skb, sp->offset, &response, sizeof(response)) < 0)
                goto protocol_error;
        if (!pskb_pull(skb, sizeof(response)))
                BUG();
@@ -1000,7 +1038,6 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
        version = ntohl(response.version);
        ticket_len = ntohl(response.ticket_len);
        kvno = ntohl(response.kvno);
-       sp = rxrpc_skb(skb);
        _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
               sp->hdr.serial, version, kvno, ticket_len);
 
@@ -1022,7 +1059,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
                return -ENOMEM;
 
        abort_code = RXKADPACKETSHORT;
-       if (skb_copy_bits(skb, 0, ticket, ticket_len) < 0)
+       if (skb_copy_bits(skb, sp->offset, ticket, ticket_len) < 0)
                goto protocol_error_free;
 
        ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key,
@@ -1147,6 +1184,7 @@ const struct rxrpc_security rxkad = {
        .prime_packet_security          = rxkad_prime_packet_security,
        .secure_packet                  = rxkad_secure_packet,
        .verify_packet                  = rxkad_verify_packet,
+       .locate_data                    = rxkad_locate_data,
        .issue_challenge                = rxkad_issue_challenge,
        .respond_to_challenge           = rxkad_respond_to_challenge,
        .verify_response                = rxkad_verify_response,
index 814d285..82d8134 100644 (file)
@@ -130,20 +130,20 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
        }
 
        /* find the service */
-       read_lock_bh(&local->services_lock);
-       list_for_each_entry(rx, &local->services, listen_link) {
+       read_lock(&local->services_lock);
+       hlist_for_each_entry(rx, &local->services, listen_link) {
                if (rx->srx.srx_service == conn->params.service_id)
                        goto found_service;
        }
 
        /* the service appears to have died */
-       read_unlock_bh(&local->services_lock);
+       read_unlock(&local->services_lock);
        _leave(" = -ENOENT");
        return -ENOENT;
 
 found_service:
        if (!rx->securities) {
-               read_unlock_bh(&local->services_lock);
+               read_unlock(&local->services_lock);
                _leave(" = -ENOKEY");
                return -ENOKEY;
        }
@@ -152,13 +152,13 @@ found_service:
        kref = keyring_search(make_key_ref(rx->securities, 1UL),
                              &key_type_rxrpc_s, kdesc);
        if (IS_ERR(kref)) {
-               read_unlock_bh(&local->services_lock);
+               read_unlock(&local->services_lock);
                _leave(" = %ld [search]", PTR_ERR(kref));
                return PTR_ERR(kref);
        }
 
        key = key_ref_to_ptr(kref);
-       read_unlock_bh(&local->services_lock);
+       read_unlock(&local->services_lock);
 
        conn->server_key = key;
        conn->security = sec;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
new file mode 100644 (file)
index 0000000..cba2365
--- /dev/null
@@ -0,0 +1,616 @@
+/* AF_RXRPC sendmsg() implementation.
+ *
+ * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/net.h>
+#include <linux/gfp.h>
+#include <linux/skbuff.h>
+#include <linux/export.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+enum rxrpc_command {
+       RXRPC_CMD_SEND_DATA,            /* send data message */
+       RXRPC_CMD_SEND_ABORT,           /* request abort generation */
+       RXRPC_CMD_ACCEPT,               /* [server] accept incoming call */
+       RXRPC_CMD_REJECT_BUSY,          /* [server] reject a call as busy */
+};
+
+/*
+ * wait for space to appear in the transmit/ACK window
+ * - caller holds the socket locked
+ */
+static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
+                                   struct rxrpc_call *call,
+                                   long *timeo)
+{
+       DECLARE_WAITQUEUE(myself, current);
+       int ret;
+
+       _enter(",{%u,%u,%u}",
+              call->tx_hard_ack, call->tx_top, call->tx_winsize);
+
+       add_wait_queue(&call->waitq, &myself);
+
+       for (;;) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               ret = 0;
+               if (call->tx_top - call->tx_hard_ack < call->tx_winsize)
+                       break;
+               if (call->state >= RXRPC_CALL_COMPLETE) {
+                       ret = -call->error;
+                       break;
+               }
+               if (signal_pending(current)) {
+                       ret = sock_intr_errno(*timeo);
+                       break;
+               }
+
+               release_sock(&rx->sk);
+               *timeo = schedule_timeout(*timeo);
+               lock_sock(&rx->sk);
+       }
+
+       remove_wait_queue(&call->waitq, &myself);
+       set_current_state(TASK_RUNNING);
+       _leave(" = %d", ret);
+       return ret;
+}
+
+/*
+ * Schedule an instant Tx resend.
+ */
+static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix)
+{
+       spin_lock_bh(&call->lock);
+
+       if (call->state < RXRPC_CALL_COMPLETE) {
+               call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS;
+               if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
+                       rxrpc_queue_call(call);
+       }
+
+       spin_unlock_bh(&call->lock);
+}
+
+/*
+ * Queue a DATA packet for transmission, set the resend timeout and send the
+ * packet immediately
+ */
+static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
+                              bool last)
+{
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+       rxrpc_seq_t seq = sp->hdr.seq;
+       int ret, ix;
+
+       _net("queue skb %p [%d]", skb, seq);
+
+       ASSERTCMP(seq, ==, call->tx_top + 1);
+
+       ix = seq & RXRPC_RXTX_BUFF_MASK;
+       rxrpc_get_skb(skb);
+       call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK;
+       smp_wmb();
+       call->rxtx_buffer[ix] = skb;
+       call->tx_top = seq;
+       if (last)
+               set_bit(RXRPC_CALL_TX_LAST, &call->flags);
+
+       if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
+               _debug("________awaiting reply/ACK__________");
+               write_lock_bh(&call->state_lock);
+               switch (call->state) {
+               case RXRPC_CALL_CLIENT_SEND_REQUEST:
+                       call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+                       break;
+               case RXRPC_CALL_SERVER_ACK_REQUEST:
+                       call->state = RXRPC_CALL_SERVER_SEND_REPLY;
+                       if (!last)
+                               break;
+               case RXRPC_CALL_SERVER_SEND_REPLY:
+                       call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
+                       break;
+               default:
+                       break;
+               }
+               write_unlock_bh(&call->state_lock);
+       }
+
+       _proto("Tx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq);
+
+       if (seq == 1 && rxrpc_is_client_call(call))
+               rxrpc_expose_client_call(call);
+
+       sp->resend_at = jiffies + rxrpc_resend_timeout;
+       ret = rxrpc_send_data_packet(call->conn, skb);
+       if (ret < 0) {
+               _debug("need instant resend %d", ret);
+               rxrpc_instant_resend(call, ix);
+       }
+
+       rxrpc_free_skb(skb);
+       _leave("");
+}
+
+/*
+ * Convert a host-endian header into a network-endian header.
+ */
+static void rxrpc_insert_header(struct sk_buff *skb)
+{
+       struct rxrpc_wire_header whdr;
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+       whdr.epoch      = htonl(sp->hdr.epoch);
+       whdr.cid        = htonl(sp->hdr.cid);
+       whdr.callNumber = htonl(sp->hdr.callNumber);
+       whdr.seq        = htonl(sp->hdr.seq);
+       whdr.serial     = htonl(sp->hdr.serial);
+       whdr.type       = sp->hdr.type;
+       whdr.flags      = sp->hdr.flags;
+       whdr.userStatus = sp->hdr.userStatus;
+       whdr.securityIndex = sp->hdr.securityIndex;
+       whdr._rsvd      = htons(sp->hdr._rsvd);
+       whdr.serviceId  = htons(sp->hdr.serviceId);
+
+       memcpy(skb->head, &whdr, sizeof(whdr));
+}
+
+/*
+ * send data through a socket
+ * - must be called in process context
+ * - caller holds the socket locked
+ */
+static int rxrpc_send_data(struct rxrpc_sock *rx,
+                          struct rxrpc_call *call,
+                          struct msghdr *msg, size_t len)
+{
+       struct rxrpc_skb_priv *sp;
+       struct sk_buff *skb;
+       struct sock *sk = &rx->sk;
+       long timeo;
+       bool more;
+       int ret, copied;
+
+       timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+       /* this should be in poll */
+       sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+       if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+               return -EPIPE;
+
+       more = msg->msg_flags & MSG_MORE;
+
+       skb = call->tx_pending;
+       call->tx_pending = NULL;
+       rxrpc_see_skb(skb);
+
+       copied = 0;
+       do {
+               if (!skb) {
+                       size_t size, chunk, max, space;
+
+                       _debug("alloc");
+
+                       if (call->tx_top - call->tx_hard_ack >=
+                           call->tx_winsize) {
+                               ret = -EAGAIN;
+                               if (msg->msg_flags & MSG_DONTWAIT)
+                                       goto maybe_error;
+                               ret = rxrpc_wait_for_tx_window(rx, call,
+                                                              &timeo);
+                               if (ret < 0)
+                                       goto maybe_error;
+                       }
+
+                       max = call->conn->params.peer->maxdata;
+                       max -= call->conn->security_size;
+                       max &= ~(call->conn->size_align - 1UL);
+
+                       chunk = max;
+                       if (chunk > msg_data_left(msg) && !more)
+                               chunk = msg_data_left(msg);
+
+                       space = chunk + call->conn->size_align;
+                       space &= ~(call->conn->size_align - 1UL);
+
+                       size = space + call->conn->header_size;
+
+                       _debug("SIZE: %zu/%zu/%zu", chunk, space, size);
+
+                       /* create a buffer that we can retain until it's ACK'd */
+                       skb = sock_alloc_send_skb(
+                               sk, size, msg->msg_flags & MSG_DONTWAIT, &ret);
+                       if (!skb)
+                               goto maybe_error;
+
+                       rxrpc_new_skb(skb);
+
+                       _debug("ALLOC SEND %p", skb);
+
+                       ASSERTCMP(skb->mark, ==, 0);
+
+                       _debug("HS: %u", call->conn->header_size);
+                       skb_reserve(skb, call->conn->header_size);
+                       skb->len += call->conn->header_size;
+
+                       sp = rxrpc_skb(skb);
+                       sp->remain = chunk;
+                       if (sp->remain > skb_tailroom(skb))
+                               sp->remain = skb_tailroom(skb);
+
+                       _net("skb: hr %d, tr %d, hl %d, rm %d",
+                              skb_headroom(skb),
+                              skb_tailroom(skb),
+                              skb_headlen(skb),
+                              sp->remain);
+
+                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+               }
+
+               _debug("append");
+               sp = rxrpc_skb(skb);
+
+               /* append next segment of data to the current buffer */
+               if (msg_data_left(msg) > 0) {
+                       int copy = skb_tailroom(skb);
+                       ASSERTCMP(copy, >, 0);
+                       if (copy > msg_data_left(msg))
+                               copy = msg_data_left(msg);
+                       if (copy > sp->remain)
+                               copy = sp->remain;
+
+                       _debug("add");
+                       ret = skb_add_data(skb, &msg->msg_iter, copy);
+                       _debug("added");
+                       if (ret < 0)
+                               goto efault;
+                       sp->remain -= copy;
+                       skb->mark += copy;
+                       copied += copy;
+               }
+
+               /* check for the far side aborting the call or a network error
+                * occurring */
+               if (call->state == RXRPC_CALL_COMPLETE)
+                       goto call_terminated;
+
+               /* add the packet to the send queue if it's now full */
+               if (sp->remain <= 0 ||
+                   (msg_data_left(msg) == 0 && !more)) {
+                       struct rxrpc_connection *conn = call->conn;
+                       uint32_t seq;
+                       size_t pad;
+
+                       /* pad out if we're using security */
+                       if (conn->security_ix) {
+                               pad = conn->security_size + skb->mark;
+                               pad = conn->size_align - pad;
+                               pad &= conn->size_align - 1;
+                               _debug("pad %zu", pad);
+                               if (pad)
+                                       memset(skb_put(skb, pad), 0, pad);
+                       }
+
+                       seq = call->tx_top + 1;
+
+                       sp->hdr.epoch   = conn->proto.epoch;
+                       sp->hdr.cid     = call->cid;
+                       sp->hdr.callNumber = call->call_id;
+                       sp->hdr.seq     = seq;
+                       sp->hdr.serial  = atomic_inc_return(&conn->serial);
+                       sp->hdr.type    = RXRPC_PACKET_TYPE_DATA;
+                       sp->hdr.userStatus = 0;
+                       sp->hdr.securityIndex = call->security_ix;
+                       sp->hdr._rsvd   = 0;
+                       sp->hdr.serviceId = call->service_id;
+
+                       sp->hdr.flags = conn->out_clientflag;
+                       if (msg_data_left(msg) == 0 && !more)
+                               sp->hdr.flags |= RXRPC_LAST_PACKET;
+                       else if (call->tx_top - call->tx_hard_ack <
+                                call->tx_winsize)
+                               sp->hdr.flags |= RXRPC_MORE_PACKETS;
+                       if (more && seq & 1)
+                               sp->hdr.flags |= RXRPC_REQUEST_ACK;
+
+                       ret = conn->security->secure_packet(
+                               call, skb, skb->mark,
+                               skb->head + sizeof(struct rxrpc_wire_header));
+                       if (ret < 0)
+                               goto out;
+
+                       rxrpc_insert_header(skb);
+                       rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
+                       skb = NULL;
+               }
+       } while (msg_data_left(msg) > 0);
+
+success:
+       ret = copied;
+out:
+       call->tx_pending = skb;
+       _leave(" = %d", ret);
+       return ret;
+
+call_terminated:
+       rxrpc_free_skb(skb);
+       _leave(" = %d", -call->error);
+       return -call->error;
+
+maybe_error:
+       if (copied)
+               goto success;
+       goto out;
+
+efault:
+       ret = -EFAULT;
+       goto out;
+}
+
+/*
+ * extract control messages from the sendmsg() control buffer
+ */
+static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
+                             unsigned long *user_call_ID,
+                             enum rxrpc_command *command,
+                             u32 *abort_code,
+                             bool *_exclusive)
+{
+       struct cmsghdr *cmsg;
+       bool got_user_ID = false;
+       int len;
+
+       *command = RXRPC_CMD_SEND_DATA;
+
+       if (msg->msg_controllen == 0)
+               return -EINVAL;
+
+       for_each_cmsghdr(cmsg, msg) {
+               if (!CMSG_OK(msg, cmsg))
+                       return -EINVAL;
+
+               len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+               _debug("CMSG %d, %d, %d",
+                      cmsg->cmsg_level, cmsg->cmsg_type, len);
+
+               if (cmsg->cmsg_level != SOL_RXRPC)
+                       continue;
+
+               switch (cmsg->cmsg_type) {
+               case RXRPC_USER_CALL_ID:
+                       if (msg->msg_flags & MSG_CMSG_COMPAT) {
+                               if (len != sizeof(u32))
+                                       return -EINVAL;
+                               *user_call_ID = *(u32 *) CMSG_DATA(cmsg);
+                       } else {
+                               if (len != sizeof(unsigned long))
+                                       return -EINVAL;
+                               *user_call_ID = *(unsigned long *)
+                                       CMSG_DATA(cmsg);
+                       }
+                       _debug("User Call ID %lx", *user_call_ID);
+                       got_user_ID = true;
+                       break;
+
+               case RXRPC_ABORT:
+                       if (*command != RXRPC_CMD_SEND_DATA)
+                               return -EINVAL;
+                       *command = RXRPC_CMD_SEND_ABORT;
+                       if (len != sizeof(*abort_code))
+                               return -EINVAL;
+                       *abort_code = *(unsigned int *) CMSG_DATA(cmsg);
+                       _debug("Abort %x", *abort_code);
+                       if (*abort_code == 0)
+                               return -EINVAL;
+                       break;
+
+               case RXRPC_ACCEPT:
+                       if (*command != RXRPC_CMD_SEND_DATA)
+                               return -EINVAL;
+                       *command = RXRPC_CMD_ACCEPT;
+                       if (len != 0)
+                               return -EINVAL;
+                       break;
+
+               case RXRPC_EXCLUSIVE_CALL:
+                       *_exclusive = true;
+                       if (len != 0)
+                               return -EINVAL;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+       }
+
+       if (!got_user_ID)
+               return -EINVAL;
+       _leave(" = 0");
+       return 0;
+}
+
+/*
+ * Create a new client call for sendmsg().
+ */
+static struct rxrpc_call *
+rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
+                                 unsigned long user_call_ID, bool exclusive)
+{
+       struct rxrpc_conn_parameters cp;
+       struct rxrpc_call *call;
+       struct key *key;
+
+       DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name);
+
+       _enter("");
+
+       if (!msg->msg_name)
+               return ERR_PTR(-EDESTADDRREQ);
+
+       key = rx->key;
+       if (key && !rx->key->payload.data[0])
+               key = NULL;
+
+       memset(&cp, 0, sizeof(cp));
+       cp.local                = rx->local;
+       cp.key                  = rx->key;
+       cp.security_level       = rx->min_sec_level;
+       cp.exclusive            = rx->exclusive | exclusive;
+       cp.service_id           = srx->srx_service;
+       call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL);
+
+       _leave(" = %p\n", call);
+       return call;
+}
+
+/*
+ * send a message forming part of a client call through an RxRPC socket
+ * - caller holds the socket locked
+ * - the socket may be either a client socket or a server socket
+ */
+int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
+{
+       enum rxrpc_command cmd;
+       struct rxrpc_call *call;
+       unsigned long user_call_ID = 0;
+       bool exclusive = false;
+       u32 abort_code = 0;
+       int ret;
+
+       _enter("");
+
+       ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code,
+                                &exclusive);
+       if (ret < 0)
+               return ret;
+
+       if (cmd == RXRPC_CMD_ACCEPT) {
+               if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
+                       return -EINVAL;
+               call = rxrpc_accept_call(rx, user_call_ID, NULL);
+               if (IS_ERR(call))
+                       return PTR_ERR(call);
+               rxrpc_put_call(call, rxrpc_call_put);
+               return 0;
+       }
+
+       call = rxrpc_find_call_by_user_ID(rx, user_call_ID);
+       if (!call) {
+               if (cmd != RXRPC_CMD_SEND_DATA)
+                       return -EBADSLT;
+               call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
+                                                        exclusive);
+               if (IS_ERR(call))
+                       return PTR_ERR(call);
+       }
+
+       _debug("CALL %d USR %lx ST %d on CONN %p",
+              call->debug_id, call->user_call_ID, call->state, call->conn);
+
+       if (call->state >= RXRPC_CALL_COMPLETE) {
+               /* it's too late for this call */
+               ret = -ESHUTDOWN;
+       } else if (cmd == RXRPC_CMD_SEND_ABORT) {
+               ret = 0;
+               if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED))
+                       ret = rxrpc_send_call_packet(call,
+                                                    RXRPC_PACKET_TYPE_ABORT);
+       } else if (cmd != RXRPC_CMD_SEND_DATA) {
+               ret = -EINVAL;
+       } else if (rxrpc_is_client_call(call) &&
+                  call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+               /* request phase complete for this client call */
+               ret = -EPROTO;
+       } else if (rxrpc_is_service_call(call) &&
+                  call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+                  call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+               /* Reply phase not begun or not complete for service call. */
+               ret = -EPROTO;
+       } else {
+               ret = rxrpc_send_data(rx, call, msg, len);
+       }
+
+       rxrpc_put_call(call, rxrpc_call_put);
+       _leave(" = %d", ret);
+       return ret;
+}
+
+/**
+ * rxrpc_kernel_send_data - Allow a kernel service to send data on a call
+ * @sock: The socket the call is on
+ * @call: The call to send data through
+ * @msg: The data to send
+ * @len: The amount of data to send
+ *
+ * Allow a kernel service to send data on a call.  The call must be in an state
+ * appropriate to sending data.  No control data should be supplied in @msg,
+ * nor should an address be supplied.  MSG_MORE should be flagged if there's
+ * more data to come, otherwise this data will end the transmission phase.
+ */
+int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
+                          struct msghdr *msg, size_t len)
+{
+       int ret;
+
+       _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
+
+       ASSERTCMP(msg->msg_name, ==, NULL);
+       ASSERTCMP(msg->msg_control, ==, NULL);
+
+       lock_sock(sock->sk);
+
+       _debug("CALL %d USR %lx ST %d on CONN %p",
+              call->debug_id, call->user_call_ID, call->state, call->conn);
+
+       if (call->state >= RXRPC_CALL_COMPLETE) {
+               ret = -ESHUTDOWN; /* it's too late for this call */
+       } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+                  call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+                  call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+               ret = -EPROTO; /* request phase complete for this client call */
+       } else {
+               ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
+       }
+
+       release_sock(sock->sk);
+       _leave(" = %d", ret);
+       return ret;
+}
+EXPORT_SYMBOL(rxrpc_kernel_send_data);
+
+/**
+ * rxrpc_kernel_abort_call - Allow a kernel service to abort a call
+ * @sock: The socket the call is on
+ * @call: The call to be aborted
+ * @abort_code: The abort code to stick into the ABORT packet
+ * @error: Local error value
+ * @why: 3-char string indicating why.
+ *
+ * Allow a kernel service to abort a call, if it's still in an abortable state.
+ */
+void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
+                            u32 abort_code, int error, const char *why)
+{
+       _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
+
+       lock_sock(sock->sk);
+
+       if (rxrpc_abort_call(why, call, 0, abort_code, error))
+               rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+
+       release_sock(sock->sk);
+       _leave("");
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_abort_call);
index 06c51d4..620d9cc 100644 (file)
 #include "ar-internal.h"
 
 /*
- * set up for the ACK at the end of the receive phase when we discard the final
- * receive phase data packet
- * - called with softirqs disabled
+ * Note the existence of a new-to-us socket buffer (allocated or dequeued).
  */
-static void rxrpc_request_final_ACK(struct rxrpc_call *call)
+void rxrpc_new_skb(struct sk_buff *skb)
 {
-       /* the call may be aborted before we have a chance to ACK it */
-       write_lock(&call->state_lock);
-
-       switch (call->state) {
-       case RXRPC_CALL_CLIENT_RECV_REPLY:
-               call->state = RXRPC_CALL_CLIENT_FINAL_ACK;
-               _debug("request final ACK");
-
-               /* get an extra ref on the call for the final-ACK generator to
-                * release */
-               rxrpc_get_call(call);
-               set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events);
-               if (try_to_del_timer_sync(&call->ack_timer) >= 0)
-                       rxrpc_queue_call(call);
-               break;
-
-       case RXRPC_CALL_SERVER_RECV_REQUEST:
-               call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
-       default:
-               break;
-       }
-
-       write_unlock(&call->state_lock);
+       const void *here = __builtin_return_address(0);
+       int n = atomic_inc_return(&rxrpc_n_skbs);
+       trace_rxrpc_skb(skb, 0, atomic_read(&skb->users), n, here);
 }
 
 /*
- * drop the bottom ACK off of the call ACK window and advance the window
+ * Note the re-emergence of a socket buffer from a queue or buffer.
  */
-static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
-                               struct rxrpc_skb_priv *sp)
+void rxrpc_see_skb(struct sk_buff *skb)
 {
-       int loop;
-       u32 seq;
-
-       spin_lock_bh(&call->lock);
-
-       _debug("hard ACK #%u", sp->hdr.seq);
-
-       for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
-               call->ackr_window[loop] >>= 1;
-               call->ackr_window[loop] |=
-                       call->ackr_window[loop + 1] << (BITS_PER_LONG - 1);
+       const void *here = __builtin_return_address(0);
+       if (skb) {
+               int n = atomic_read(&rxrpc_n_skbs);
+               trace_rxrpc_skb(skb, 1, atomic_read(&skb->users), n, here);
        }
-
-       seq = sp->hdr.seq;
-       ASSERTCMP(seq, ==, call->rx_data_eaten + 1);
-       call->rx_data_eaten = seq;
-
-       if (call->ackr_win_top < UINT_MAX)
-               call->ackr_win_top++;
-
-       ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE,
-                   call->rx_data_post, >=, call->rx_data_recv);
-       ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE,
-                   call->rx_data_recv, >=, call->rx_data_eaten);
-
-       if (sp->hdr.flags & RXRPC_LAST_PACKET) {
-               rxrpc_request_final_ACK(call);
-       } else if (atomic_dec_and_test(&call->ackr_not_idle) &&
-                  test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) {
-               /* We previously soft-ACK'd some received packets that have now
-                * been consumed, so send a hard-ACK if no more packets are
-                * immediately forthcoming to allow the transmitter to free up
-                * its Tx bufferage.
-                */
-               _debug("send Rx idle ACK");
-               __rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial,
-                                   false);
-       }
-
-       spin_unlock_bh(&call->lock);
 }
 
-/**
- * rxrpc_kernel_data_consumed - Record consumption of data message
- * @call: The call to which the message pertains.
- * @skb: Message holding data
- *
- * Record the consumption of a data message and generate an ACK if appropriate.
- * The call state is shifted if this was the final packet.  The caller must be
- * in process context with no spinlocks held.
- *
- * TODO: Actually generate the ACK here rather than punting this to the
- * workqueue.
+/*
+ * Note the addition of a ref on a socket buffer.
  */
-void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb)
+void rxrpc_get_skb(struct sk_buff *skb)
 {
-       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
-       _enter("%d,%p{%u}", call->debug_id, skb, sp->hdr.seq);
-
-       ASSERTCMP(sp->call, ==, call);
-       ASSERTCMP(sp->hdr.type, ==, RXRPC_PACKET_TYPE_DATA);
-
-       /* TODO: Fix the sequence number tracking */
-       ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
-       ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
-       ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
-
-       call->rx_data_recv = sp->hdr.seq;
-       rxrpc_hard_ACK_data(call, sp);
+       const void *here = __builtin_return_address(0);
+       int n = atomic_inc_return(&rxrpc_n_skbs);
+       trace_rxrpc_skb(skb, 2, atomic_read(&skb->users), n, here);
+       skb_get(skb);
 }
-EXPORT_SYMBOL(rxrpc_kernel_data_consumed);
 
 /*
- * Destroy a packet that has an RxRPC control buffer
+ * Note the destruction of a socket buffer.
  */
-void rxrpc_packet_destructor(struct sk_buff *skb)
+void rxrpc_free_skb(struct sk_buff *skb)
 {
-       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-       struct rxrpc_call *call = sp->call;
-
-       _enter("%p{%p}", skb, call);
-
-       if (call) {
-               if (atomic_dec_return(&call->skb_count) < 0)
-                       BUG();
-               rxrpc_put_call(call);
-               sp->call = NULL;
+       const void *here = __builtin_return_address(0);
+       if (skb) {
+               int n;
+               CHECK_SLAB_OKAY(&skb->users);
+               n = atomic_dec_return(&rxrpc_n_skbs);
+               trace_rxrpc_skb(skb, 3, atomic_read(&skb->users), n, here);
+               kfree_skb(skb);
        }
-
-       if (skb->sk)
-               sock_rfree(skb);
-       _leave("");
 }
 
-/**
- * rxrpc_kernel_free_skb - Free an RxRPC socket buffer
- * @skb: The socket buffer to be freed
- *
- * Let RxRPC free its own socket buffer, permitting it to maintain debug
- * accounting.
+/*
+ * Clear a queue of socket buffers.
  */
-void rxrpc_kernel_free_skb(struct sk_buff *skb)
+void rxrpc_purge_queue(struct sk_buff_head *list)
 {
-       rxrpc_free_skb(skb);
+       const void *here = __builtin_return_address(0);
+       struct sk_buff *skb;
+       while ((skb = skb_dequeue((list))) != NULL) {
+               int n = atomic_dec_return(&rxrpc_n_skbs);
+               trace_rxrpc_skb(skb, 4, atomic_read(&skb->users), n, here);
+               kfree_skb(skb);
+       }
 }
-EXPORT_SYMBOL(rxrpc_kernel_free_skb);
index 03ad087..a03c61c 100644 (file)
@@ -20,7 +20,7 @@ static const unsigned int one = 1;
 static const unsigned int four = 4;
 static const unsigned int thirtytwo = 32;
 static const unsigned int n_65535 = 65535;
-static const unsigned int n_max_acks = RXRPC_MAXACKS;
+static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
 
 /*
  * RxRPC operating parameters.
@@ -62,6 +62,22 @@ static struct ctl_table rxrpc_sysctl_table[] = {
                .proc_handler   = proc_dointvec_ms_jiffies,
                .extra1         = (void *)&one,
        },
+       {
+               .procname       = "idle_conn_expiry",
+               .data           = &rxrpc_conn_idle_client_expiry,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_ms_jiffies,
+               .extra1         = (void *)&one,
+       },
+       {
+               .procname       = "idle_conn_fast_expiry",
+               .data           = &rxrpc_conn_idle_client_fast_expiry,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_ms_jiffies,
+               .extra1         = (void *)&one,
+       },
 
        /* Values measured in seconds but used in jiffies */
        {
@@ -72,26 +88,25 @@ static struct ctl_table rxrpc_sysctl_table[] = {
                .proc_handler   = proc_dointvec_jiffies,
                .extra1         = (void *)&one,
        },
+
+       /* Non-time values */
        {
-               .procname       = "dead_call_expiry",
-               .data           = &rxrpc_dead_call_expiry,
+               .procname       = "max_client_conns",
+               .data           = &rxrpc_max_client_connections,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec_jiffies,
-               .extra1         = (void *)&one,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = (void *)&rxrpc_reap_client_connections,
        },
-
-       /* Values measured in seconds */
        {
-               .procname       = "connection_expiry",
-               .data           = &rxrpc_connection_expiry,
+               .procname       = "reap_client_conns",
+               .data           = &rxrpc_reap_client_connections,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = (void *)&one,
+               .extra2         = (void *)&rxrpc_max_client_connections,
        },
-
-       /* Non-time values */
        {
                .procname       = "max_backlog",
                .data           = &rxrpc_max_backlog,
index b88914d..ff7af71 100644 (file)
@@ -30,6 +30,7 @@ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
                srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
                return 0;
 
+#ifdef CONFIG_AF_RXRPC_IPV6
        case ETH_P_IPV6:
                srx->transport_type = SOCK_DGRAM;
                srx->transport_len = sizeof(srx->transport.sin6);
@@ -37,6 +38,7 @@ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
                srx->transport.sin6.sin6_port = udp_hdr(skb)->source;
                srx->transport.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
                return 0;
+#endif
 
        default:
                pr_warn_ratelimited("AF_RXRPC: Unknown eth protocol %u\n",
index ccf931b..7795d5a 100644 (file)
@@ -749,6 +749,17 @@ config NET_ACT_CONNMARK
          To compile this code as a module, choose M here: the
          module will be called act_connmark.
 
+config NET_ACT_SKBMOD
+        tristate "skb data modification action"
+        depends on NET_CLS_ACT
+        ---help---
+         Say Y here to allow modification of skb data
+
+         If unsure, say N.
+
+         To compile this code as a module, choose M here: the
+         module will be called act_skbmod.
+
 config NET_ACT_IFE
         tristate "Inter-FE action based on IETF ForCES InterFE LFB"
         depends on NET_CLS_ACT
@@ -761,6 +772,17 @@ config NET_ACT_IFE
          To compile this code as a module, choose M here: the
          module will be called act_ife.
 
+config NET_ACT_TUNNEL_KEY
+        tristate "IP tunnel metadata manipulation"
+        depends on NET_CLS_ACT
+        ---help---
+         Say Y here to set/release ip tunnel metadata.
+
+         If unsure, say N.
+
+         To compile this code as a module, choose M here: the
+         module will be called act_tunnel_key.
+
 config NET_IFE_SKBMARK
         tristate "Support to encoding decoding skb mark on IFE action"
         depends on NET_ACT_IFE
index ae088a5..148ae0d 100644 (file)
@@ -19,9 +19,11 @@ obj-$(CONFIG_NET_ACT_CSUM)   += act_csum.o
 obj-$(CONFIG_NET_ACT_VLAN)     += act_vlan.o
 obj-$(CONFIG_NET_ACT_BPF)      += act_bpf.o
 obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o
+obj-$(CONFIG_NET_ACT_SKBMOD)   += act_skbmod.o
 obj-$(CONFIG_NET_ACT_IFE)      += act_ife.o
 obj-$(CONFIG_NET_IFE_SKBMARK)  += act_meta_mark.o
 obj-$(CONFIG_NET_IFE_SKBPRIO)  += act_meta_skbprio.o
+obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
 obj-$(CONFIG_NET_SCH_FIFO)     += sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)      += sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)      += sch_htb.o
index bfa8707..1d39600 100644 (file)
@@ -39,13 +39,10 @@ static struct tc_action_ops act_bpf_ops;
 static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
                   struct tcf_result *res)
 {
+       bool at_ingress = skb_at_tc_ingress(skb);
        struct tcf_bpf *prog = to_bpf(act);
        struct bpf_prog *filter;
        int action, filter_res;
-       bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
-
-       if (unlikely(!skb_mac_header_was_set(skb)))
-               return TC_ACT_UNSPEC;
 
        tcf_lastuse_update(&prog->tcf_tm);
        bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
index 141a06e..e87cd81 100644 (file)
@@ -53,7 +53,7 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval)
        u32 *tlv = (u32 *)(skbdata);
        u16 totlen = nla_total_size(dlen);      /*alignment + hdr */
        char *dptr = (char *)tlv + NLA_HDRLEN;
-       u32 htlv = attrtype << 16 | totlen;
+       u32 htlv = attrtype << 16 | dlen;
 
        *tlv = htonl(htlv);
        memset(dptr, 0, totlen - NLA_HDRLEN);
@@ -135,7 +135,7 @@ EXPORT_SYMBOL_GPL(ife_release_meta_gen);
 
 int ife_validate_meta_u32(void *val, int len)
 {
-       if (len == 4)
+       if (len == sizeof(u32))
                return 0;
 
        return -EINVAL;
@@ -144,8 +144,8 @@ EXPORT_SYMBOL_GPL(ife_validate_meta_u32);
 
 int ife_validate_meta_u16(void *val, int len)
 {
-       /* length will include padding */
-       if (len == NLA_ALIGN(2))
+       /* length will not include padding */
+       if (len == sizeof(u16))
                return 0;
 
        return -EINVAL;
@@ -652,12 +652,14 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
                u8 *tlvdata = (u8 *)tlv;
                u16 mtype = tlv->type;
                u16 mlen = tlv->len;
+               u16 alen;
 
                mtype = ntohs(mtype);
                mlen = ntohs(mlen);
+               alen = NLA_ALIGN(mlen);
 
-               if (find_decode_metaid(skb, ife, mtype, (mlen - 4),
-                                      (void *)(tlvdata + 4))) {
+               if (find_decode_metaid(skb, ife, mtype, (mlen - NLA_HDRLEN),
+                                      (void *)(tlvdata + NLA_HDRLEN))) {
                        /* abuse overlimits to count when we receive metadata
                         * but dont have an ops for it
                         */
@@ -666,8 +668,8 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
                        ife->tcf_qstats.overlimits++;
                }
 
-               tlvdata += mlen;
-               ifehdrln -= mlen;
+               tlvdata += alen;
+               ifehdrln -= alen;
                tlv = (struct meta_tlvhdr *)tlvdata;
        }
 
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
new file mode 100644 (file)
index 0000000..e7d9638
--- /dev/null
@@ -0,0 +1,301 @@
+/*
+ * net/sched/act_skbmod.c  skb data modifier
+ *
+ * Copyright (c) 2016 Jamal Hadi Salim <jhs@mojatatu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#include <linux/tc_act/tc_skbmod.h>
+#include <net/tc_act/tc_skbmod.h>
+
+#define SKBMOD_TAB_MASK     15
+
+static int skbmod_net_id;
+static struct tc_action_ops act_skbmod_ops;
+
+#define MAX_EDIT_LEN ETH_HLEN
+static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
+                         struct tcf_result *res)
+{
+       struct tcf_skbmod *d = to_skbmod(a);
+       int action;
+       struct tcf_skbmod_params *p;
+       u64 flags;
+       int err;
+
+       tcf_lastuse_update(&d->tcf_tm);
+       bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+
+       /* XXX: if you are going to edit more fields beyond ethernet header
+        * (example when you add IP header replacement or vlan swap)
+        * then MAX_EDIT_LEN needs to change appropriately
+       */
+       err = skb_ensure_writable(skb, MAX_EDIT_LEN);
+       if (unlikely(err)) { /* best policy is to drop on the floor */
+               qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
+               return TC_ACT_SHOT;
+       }
+
+       rcu_read_lock();
+       action = READ_ONCE(d->tcf_action);
+       if (unlikely(action == TC_ACT_SHOT)) {
+               qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
+               rcu_read_unlock();
+               return action;
+       }
+
+       p = rcu_dereference(d->skbmod_p);
+       flags = p->flags;
+       if (flags & SKBMOD_F_DMAC)
+               ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
+       if (flags & SKBMOD_F_SMAC)
+               ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src);
+       if (flags & SKBMOD_F_ETYPE)
+               eth_hdr(skb)->h_proto = p->eth_type;
+       rcu_read_unlock();
+
+       if (flags & SKBMOD_F_SWAPMAC) {
+               u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */
+               /*XXX: I am sure we can come up with more efficient swapping*/
+               ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest);
+               ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source);
+               ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr);
+       }
+
+       return action;
+}
+
+static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
+       [TCA_SKBMOD_PARMS]              = { .len = sizeof(struct tc_skbmod) },
+       [TCA_SKBMOD_DMAC]               = { .len = ETH_ALEN },
+       [TCA_SKBMOD_SMAC]               = { .len = ETH_ALEN },
+       [TCA_SKBMOD_ETYPE]              = { .type = NLA_U16 },
+};
+
+static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
+                          struct nlattr *est, struct tc_action **a,
+                          int ovr, int bind)
+{
+       struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+       struct nlattr *tb[TCA_SKBMOD_MAX + 1];
+       struct tcf_skbmod_params *p, *p_old;
+       struct tc_skbmod *parm;
+       struct tcf_skbmod *d;
+       bool exists = false;
+       u8 *daddr = NULL;
+       u8 *saddr = NULL;
+       u16 eth_type = 0;
+       u32 lflags = 0;
+       int ret = 0, err;
+
+       if (!nla)
+               return -EINVAL;
+
+       err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy);
+       if (err < 0)
+               return err;
+
+       if (!tb[TCA_SKBMOD_PARMS])
+               return -EINVAL;
+
+       if (tb[TCA_SKBMOD_DMAC]) {
+               daddr = nla_data(tb[TCA_SKBMOD_DMAC]);
+               lflags |= SKBMOD_F_DMAC;
+       }
+
+       if (tb[TCA_SKBMOD_SMAC]) {
+               saddr = nla_data(tb[TCA_SKBMOD_SMAC]);
+               lflags |= SKBMOD_F_SMAC;
+       }
+
+       if (tb[TCA_SKBMOD_ETYPE]) {
+               eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]);
+               lflags |= SKBMOD_F_ETYPE;
+       }
+
+       parm = nla_data(tb[TCA_SKBMOD_PARMS]);
+       if (parm->flags & SKBMOD_F_SWAPMAC)
+               lflags = SKBMOD_F_SWAPMAC;
+
+       exists = tcf_hash_check(tn, parm->index, a, bind);
+       if (exists && bind)
+               return 0;
+
+       if (!lflags)
+               return -EINVAL;
+
+       if (!exists) {
+               ret = tcf_hash_create(tn, parm->index, est, a,
+                                     &act_skbmod_ops, bind, true);
+               if (ret)
+                       return ret;
+
+               ret = ACT_P_CREATED;
+       } else {
+               tcf_hash_release(*a, bind);
+               if (!ovr)
+                       return -EEXIST;
+       }
+
+       d = to_skbmod(*a);
+
+       ASSERT_RTNL();
+       p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
+       if (unlikely(!p)) {
+               if (ovr)
+                       tcf_hash_release(*a, bind);
+               return -ENOMEM;
+       }
+
+       p->flags = lflags;
+       d->tcf_action = parm->action;
+
+       p_old = rtnl_dereference(d->skbmod_p);
+
+       if (ovr)
+               spin_lock_bh(&d->tcf_lock);
+
+       if (lflags & SKBMOD_F_DMAC)
+               ether_addr_copy(p->eth_dst, daddr);
+       if (lflags & SKBMOD_F_SMAC)
+               ether_addr_copy(p->eth_src, saddr);
+       if (lflags & SKBMOD_F_ETYPE)
+               p->eth_type = htons(eth_type);
+
+       rcu_assign_pointer(d->skbmod_p, p);
+       if (ovr)
+               spin_unlock_bh(&d->tcf_lock);
+
+       if (p_old)
+               kfree_rcu(p_old, rcu);
+
+       if (ret == ACT_P_CREATED)
+               tcf_hash_insert(tn, *a);
+       return ret;
+}
+
+static void tcf_skbmod_cleanup(struct tc_action *a, int bind)
+{
+       struct tcf_skbmod *d = to_skbmod(a);
+       struct tcf_skbmod_params  *p;
+
+       p = rcu_dereference_protected(d->skbmod_p, 1);
+       kfree_rcu(p, rcu);
+}
+
+static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
+                          int bind, int ref)
+{
+       struct tcf_skbmod *d = to_skbmod(a);
+       unsigned char *b = skb_tail_pointer(skb);
+       struct tcf_skbmod_params  *p = rtnl_dereference(d->skbmod_p);
+       struct tc_skbmod opt = {
+               .index   = d->tcf_index,
+               .refcnt  = d->tcf_refcnt - ref,
+               .bindcnt = d->tcf_bindcnt - bind,
+               .action  = d->tcf_action,
+       };
+       struct tcf_t t;
+
+       opt.flags  = p->flags;
+       if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt))
+               goto nla_put_failure;
+       if ((p->flags & SKBMOD_F_DMAC) &&
+           nla_put(skb, TCA_SKBMOD_DMAC, ETH_ALEN, p->eth_dst))
+               goto nla_put_failure;
+       if ((p->flags & SKBMOD_F_SMAC) &&
+           nla_put(skb, TCA_SKBMOD_SMAC, ETH_ALEN, p->eth_src))
+               goto nla_put_failure;
+       if ((p->flags & SKBMOD_F_ETYPE) &&
+           nla_put_u16(skb, TCA_SKBMOD_ETYPE, ntohs(p->eth_type)))
+               goto nla_put_failure;
+
+       tcf_tm_dump(&t, &d->tcf_tm);
+       if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD))
+               goto nla_put_failure;
+
+       return skb->len;
+nla_put_failure:
+       rcu_read_unlock();
+       nlmsg_trim(skb, b);
+       return -1;
+}
+
+static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
+                            struct netlink_callback *cb, int type,
+                            const struct tc_action_ops *ops)
+{
+       struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+       return tcf_generic_walker(tn, skb, cb, type, ops);
+}
+
+static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+       return tcf_hash_search(tn, a, index);
+}
+
+static struct tc_action_ops act_skbmod_ops = {
+       .kind           =       "skbmod",
+       .type           =       TCA_ACT_SKBMOD,
+       .owner          =       THIS_MODULE,
+       .act            =       tcf_skbmod_run,
+       .dump           =       tcf_skbmod_dump,
+       .init           =       tcf_skbmod_init,
+       .cleanup        =       tcf_skbmod_cleanup,
+       .walk           =       tcf_skbmod_walker,
+       .lookup         =       tcf_skbmod_search,
+       .size           =       sizeof(struct tcf_skbmod),
+};
+
+static __net_init int skbmod_init_net(struct net *net)
+{
+       struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+       return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK);
+}
+
+static void __net_exit skbmod_exit_net(struct net *net)
+{
+       struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+       tc_action_net_exit(tn);
+}
+
+static struct pernet_operations skbmod_net_ops = {
+       .init = skbmod_init_net,
+       .exit = skbmod_exit_net,
+       .id   = &skbmod_net_id,
+       .size = sizeof(struct tc_action_net),
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>");
+MODULE_DESCRIPTION("SKB data mod-ing");
+MODULE_LICENSE("GPL");
+
+static int __init skbmod_init_module(void)
+{
+       return tcf_register_action(&act_skbmod_ops, &skbmod_net_ops);
+}
+
+static void __exit skbmod_cleanup_module(void)
+{
+       tcf_unregister_action(&act_skbmod_ops, &skbmod_net_ops);
+}
+
+module_init(skbmod_init_module);
+module_exit(skbmod_cleanup_module);
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
new file mode 100644 (file)
index 0000000..af47bdf
--- /dev/null
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2016, Amir Vadai <amir@vadai.me>
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/dst.h>
+#include <net/dst_metadata.h>
+
+#include <linux/tc_act/tc_tunnel_key.h>
+#include <net/tc_act/tc_tunnel_key.h>
+
+#define TUNNEL_KEY_TAB_MASK     15
+
+static int tunnel_key_net_id;
+static struct tc_action_ops act_tunnel_key_ops;
+
+static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
+                         struct tcf_result *res)
+{
+       struct tcf_tunnel_key *t = to_tunnel_key(a);
+       struct tcf_tunnel_key_params *params;
+       int action;
+
+       rcu_read_lock();
+
+       params = rcu_dereference(t->params);
+
+       tcf_lastuse_update(&t->tcf_tm);
+       bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
+       action = params->action;
+
+       switch (params->tcft_action) {
+       case TCA_TUNNEL_KEY_ACT_RELEASE:
+               skb_dst_drop(skb);
+               break;
+       case TCA_TUNNEL_KEY_ACT_SET:
+               skb_dst_drop(skb);
+               skb_dst_set(skb, dst_clone(&params->tcft_enc_metadata->dst));
+               break;
+       default:
+               WARN_ONCE(1, "Bad tunnel_key action %d.\n",
+                         params->tcft_action);
+               break;
+       }
+
+       rcu_read_unlock();
+
+       return action;
+}
+
+static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
+       [TCA_TUNNEL_KEY_PARMS]      = { .len = sizeof(struct tc_tunnel_key) },
+       [TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 },
+       [TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NLA_U32 },
+       [TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
+       [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) },
+       [TCA_TUNNEL_KEY_ENC_KEY_ID]   = { .type = NLA_U32 },
+};
+
+static int tunnel_key_init(struct net *net, struct nlattr *nla,
+                          struct nlattr *est, struct tc_action **a,
+                          int ovr, int bind)
+{
+       struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+       struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
+       struct tcf_tunnel_key_params *params_old;
+       struct tcf_tunnel_key_params *params_new;
+       struct metadata_dst *metadata = NULL;
+       struct tc_tunnel_key *parm;
+       struct tcf_tunnel_key *t;
+       bool exists = false;
+       __be64 key_id;
+       int ret = 0;
+       int err;
+
+       if (!nla)
+               return -EINVAL;
+
+       err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy);
+       if (err < 0)
+               return err;
+
+       if (!tb[TCA_TUNNEL_KEY_PARMS])
+               return -EINVAL;
+
+       parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
+       exists = tcf_hash_check(tn, parm->index, a, bind);
+       if (exists && bind)
+               return 0;
+
+       switch (parm->t_action) {
+       case TCA_TUNNEL_KEY_ACT_RELEASE:
+               break;
+       case TCA_TUNNEL_KEY_ACT_SET:
+               if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) {
+                       ret = -EINVAL;
+                       goto err_out;
+               }
+
+               key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]));
+
+               if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] &&
+                   tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) {
+                       __be32 saddr;
+                       __be32 daddr;
+
+                       saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]);
+                       daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]);
+
+                       metadata = __ip_tun_set_dst(saddr, daddr, 0, 0,
+                                                   TUNNEL_KEY, key_id, 0);
+               } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] &&
+                          tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) {
+                       struct in6_addr saddr;
+                       struct in6_addr daddr;
+
+                       saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]);
+                       daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]);
+
+                       metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0,
+                                                     TUNNEL_KEY, key_id, 0);
+               }
+
+               if (!metadata) {
+                       ret = -EINVAL;
+                       goto err_out;
+               }
+
+               metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
+               break;
+       default:
+               goto err_out;
+       }
+
+       if (!exists) {
+               ret = tcf_hash_create(tn, parm->index, est, a,
+                                     &act_tunnel_key_ops, bind, true);
+               if (ret)
+                       return ret;
+
+               ret = ACT_P_CREATED;
+       } else {
+               tcf_hash_release(*a, bind);
+               if (!ovr)
+                       return -EEXIST;
+       }
+
+       t = to_tunnel_key(*a);
+
+       ASSERT_RTNL();
+       params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
+       if (unlikely(!params_new)) {
+               if (ret == ACT_P_CREATED)
+                       tcf_hash_release(*a, bind);
+               return -ENOMEM;
+       }
+
+       params_old = rtnl_dereference(t->params);
+
+       params_new->action = parm->action;
+       params_new->tcft_action = parm->t_action;
+       params_new->tcft_enc_metadata = metadata;
+
+       rcu_assign_pointer(t->params, params_new);
+
+       if (params_old)
+               kfree_rcu(params_old, rcu);
+
+       if (ret == ACT_P_CREATED)
+               tcf_hash_insert(tn, *a);
+
+       return ret;
+
+err_out:
+       if (exists)
+               tcf_hash_release(*a, bind);
+       return ret;
+}
+
+static void tunnel_key_release(struct tc_action *a, int bind)
+{
+       struct tcf_tunnel_key *t = to_tunnel_key(a);
+       struct tcf_tunnel_key_params *params;
+
+       params = rcu_dereference_protected(t->params, 1);
+
+       if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
+               dst_release(&params->tcft_enc_metadata->dst);
+
+       kfree_rcu(params, rcu);
+}
+
+static int tunnel_key_dump_addresses(struct sk_buff *skb,
+                                    const struct ip_tunnel_info *info)
+{
+       unsigned short family = ip_tunnel_info_af(info);
+
+       if (family == AF_INET) {
+               __be32 saddr = info->key.u.ipv4.src;
+               __be32 daddr = info->key.u.ipv4.dst;
+
+               if (!nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_SRC, saddr) &&
+                   !nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_DST, daddr))
+                       return 0;
+       }
+
+       if (family == AF_INET6) {
+               const struct in6_addr *saddr6 = &info->key.u.ipv6.src;
+               const struct in6_addr *daddr6 = &info->key.u.ipv6.dst;
+
+               if (!nla_put_in6_addr(skb,
+                                     TCA_TUNNEL_KEY_ENC_IPV6_SRC, saddr6) &&
+                   !nla_put_in6_addr(skb,
+                                     TCA_TUNNEL_KEY_ENC_IPV6_DST, daddr6))
+                       return 0;
+       }
+
+       return -EINVAL;
+}
+
+static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
+                          int bind, int ref)
+{
+       unsigned char *b = skb_tail_pointer(skb);
+       struct tcf_tunnel_key *t = to_tunnel_key(a);
+       struct tcf_tunnel_key_params *params;
+       struct tc_tunnel_key opt = {
+               .index    = t->tcf_index,
+               .refcnt   = t->tcf_refcnt - ref,
+               .bindcnt  = t->tcf_bindcnt - bind,
+       };
+       struct tcf_t tm;
+
+       params = rtnl_dereference(t->params);
+
+       opt.t_action = params->tcft_action;
+       opt.action = params->action;
+
+       if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt))
+               goto nla_put_failure;
+
+       if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
+               struct ip_tunnel_key *key =
+                       &params->tcft_enc_metadata->u.tun_info.key;
+               __be32 key_id = tunnel_id_to_key32(key->tun_id);
+
+               if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) ||
+                   tunnel_key_dump_addresses(skb,
+                                             &params->tcft_enc_metadata->u.tun_info))
+                       goto nla_put_failure;
+       }
+
+       tcf_tm_dump(&tm, &t->tcf_tm);
+       if (nla_put_64bit(skb, TCA_TUNNEL_KEY_TM, sizeof(tm),
+                         &tm, TCA_TUNNEL_KEY_PAD))
+               goto nla_put_failure;
+
+       return skb->len;
+
+nla_put_failure:
+       nlmsg_trim(skb, b);
+       return -1;
+}
+
+static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
+                            struct netlink_callback *cb, int type,
+                            const struct tc_action_ops *ops)
+{
+       struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+       return tcf_generic_walker(tn, skb, cb, type, ops);
+}
+
+static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
+{
+       struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+       return tcf_hash_search(tn, a, index);
+}
+
+static struct tc_action_ops act_tunnel_key_ops = {
+       .kind           =       "tunnel_key",
+       .type           =       TCA_ACT_TUNNEL_KEY,
+       .owner          =       THIS_MODULE,
+       .act            =       tunnel_key_act,
+       .dump           =       tunnel_key_dump,
+       .init           =       tunnel_key_init,
+       .cleanup        =       tunnel_key_release,
+       .walk           =       tunnel_key_walker,
+       .lookup         =       tunnel_key_search,
+       .size           =       sizeof(struct tcf_tunnel_key),
+};
+
+static __net_init int tunnel_key_init_net(struct net *net)
+{
+       struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+       return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK);
+}
+
+static void __net_exit tunnel_key_exit_net(struct net *net)
+{
+       struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+       tc_action_net_exit(tn);
+}
+
+static struct pernet_operations tunnel_key_net_ops = {
+       .init = tunnel_key_init_net,
+       .exit = tunnel_key_exit_net,
+       .id   = &tunnel_key_net_id,
+       .size = sizeof(struct tc_action_net),
+};
+
+static int __init tunnel_key_init_module(void)
+{
+       return tcf_register_action(&act_tunnel_key_ops, &tunnel_key_net_ops);
+}
+
+static void __exit tunnel_key_cleanup_module(void)
+{
+       tcf_unregister_action(&act_tunnel_key_ops, &tunnel_key_net_ops);
+}
+
+module_init(tunnel_key_init_module);
+module_exit(tunnel_key_cleanup_module);
+
+MODULE_AUTHOR("Amir Vadai <amir@vadai.me>");
+MODULE_DESCRIPTION("ip tunnel manipulation actions");
+MODULE_LICENSE("GPL v2");
index 691409d..59a8d31 100644 (file)
@@ -43,7 +43,8 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
                        goto drop;
                break;
        case TCA_VLAN_ACT_PUSH:
-               err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid);
+               err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid |
+                                   (v->tcfv_push_prio << VLAN_PRIO_SHIFT));
                if (err)
                        goto drop;
                break;
@@ -65,6 +66,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
        [TCA_VLAN_PARMS]                = { .len = sizeof(struct tc_vlan) },
        [TCA_VLAN_PUSH_VLAN_ID]         = { .type = NLA_U16 },
        [TCA_VLAN_PUSH_VLAN_PROTOCOL]   = { .type = NLA_U16 },
+       [TCA_VLAN_PUSH_VLAN_PRIORITY]   = { .type = NLA_U8 },
 };
 
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
@@ -78,6 +80,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
        int action;
        __be16 push_vid = 0;
        __be16 push_proto = 0;
+       u8 push_prio = 0;
        bool exists = false;
        int ret = 0, err;
 
@@ -123,6 +126,9 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
                } else {
                        push_proto = htons(ETH_P_8021Q);
                }
+
+               if (tb[TCA_VLAN_PUSH_VLAN_PRIORITY])
+                       push_prio = nla_get_u8(tb[TCA_VLAN_PUSH_VLAN_PRIORITY]);
                break;
        default:
                if (exists)
@@ -150,6 +156,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 
        v->tcfv_action = action;
        v->tcfv_push_vid = push_vid;
+       v->tcfv_push_prio = push_prio;
        v->tcfv_push_proto = push_proto;
 
        v->tcf_action = parm->action;
@@ -181,7 +188,9 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
        if (v->tcfv_action == TCA_VLAN_ACT_PUSH &&
            (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) ||
             nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
-                         v->tcfv_push_proto)))
+                         v->tcfv_push_proto) ||
+            (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY,
+                                             v->tcfv_push_prio))))
                goto nla_put_failure;
 
        tcf_tm_dump(&t, &v->tcf_tm);
index 0b8c3ac..eb219b7 100644 (file)
@@ -138,10 +138,12 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
        struct tcf_exts e;
        struct tcf_ematch_tree t;
 
-       tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
-       err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+       err = tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
        if (err < 0)
                return err;
+       err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+       if (err < 0)
+               goto errout;
 
        err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t);
        if (err < 0)
@@ -189,7 +191,10 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
        if (!fnew)
                return -ENOBUFS;
 
-       tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+       err = tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+       if (err < 0)
+               goto errout;
+
        err = -EINVAL;
        if (handle) {
                fnew->handle = handle;
@@ -226,6 +231,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
 
        return 0;
 errout:
+       tcf_exts_destroy(&fnew->exts);
        kfree(fnew);
        return err;
 }
index c3002c2..1d92d4d 100644 (file)
@@ -83,9 +83,6 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
        struct cls_bpf_prog *prog;
        int ret = -1;
 
-       if (unlikely(!skb_mac_header_was_set(skb)))
-               return -1;
-
        /* Needed here for accessing maps. */
        rcu_read_lock();
        list_for_each_entry_rcu(prog, &head->plist, link) {
@@ -311,17 +308,19 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
        if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
                return -EINVAL;
 
-       tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
-       ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
+       ret = tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
        if (ret < 0)
                return ret;
+       ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
+       if (ret < 0)
+               goto errout;
 
        if (tb[TCA_BPF_FLAGS]) {
                u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
 
                if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
-                       tcf_exts_destroy(&exts);
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto errout;
                }
 
                have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
@@ -331,10 +330,8 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 
        ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
                       cls_bpf_prog_from_efd(tb, prog, tp);
-       if (ret < 0) {
-               tcf_exts_destroy(&exts);
-               return ret;
-       }
+       if (ret < 0)
+               goto errout;
 
        if (tb[TCA_BPF_CLASSID]) {
                prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
@@ -343,6 +340,10 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 
        tcf_exts_change(tp, &prog->exts, &exts);
        return 0;
+
+errout:
+       tcf_exts_destroy(&exts);
+       return ret;
 }
 
 static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
@@ -388,7 +389,9 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
        if (!prog)
                return -ENOBUFS;
 
-       tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+       ret = tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+       if (ret < 0)
+               goto errout;
 
        if (oldprog) {
                if (handle && oldprog->handle != handle) {
@@ -420,9 +423,10 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 
        *arg = (unsigned long) prog;
        return 0;
+
 errout:
+       tcf_exts_destroy(&prog->exts);
        kfree(prog);
-
        return ret;
 }
 
index 4c85bd3..85233c4 100644 (file)
@@ -93,7 +93,9 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
        if (!new)
                return -ENOBUFS;
 
-       tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+       err = tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+       if (err < 0)
+               goto errout;
        new->handle = handle;
        new->tp = tp;
        err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
@@ -101,10 +103,14 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
        if (err < 0)
                goto errout;
 
-       tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
-       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+       err = tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
        if (err < 0)
                goto errout;
+       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+       if (err < 0) {
+               tcf_exts_destroy(&e);
+               goto errout;
+       }
 
        err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
        if (err < 0) {
@@ -120,6 +126,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
                call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
        return 0;
 errout:
+       tcf_exts_destroy(&new->exts);
        kfree(new);
        return err;
 }
index fbfec6a..a379bae 100644 (file)
@@ -29,7 +29,7 @@
 #include <net/route.h>
 #include <net/flow_dissector.h>
 
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 #include <net/netfilter/nf_conntrack.h>
 #endif
 
@@ -125,14 +125,14 @@ static u32 flow_get_mark(const struct sk_buff *skb)
 
 static u32 flow_get_nfct(const struct sk_buff *skb)
 {
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
        return addr_fold(skb->nfct);
 #else
        return 0;
 #endif
 }
 
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 #define CTTUPLE(skb, member)                                           \
 ({                                                                     \
        enum ip_conntrack_info ctinfo;                                  \
@@ -418,10 +418,12 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
                        return -EOPNOTSUPP;
        }
 
-       tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+       err = tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+       if (err < 0)
+               goto err1;
        err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
        if (err < 0)
-               return err;
+               goto err1;
 
        err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
        if (err < 0)
@@ -432,13 +434,15 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
        if (!fnew)
                goto err2;
 
-       tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+       err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+       if (err < 0)
+               goto err3;
 
        fold = (struct flow_filter *)*arg;
        if (fold) {
                err = -EINVAL;
                if (fold->handle != handle && handle)
-                       goto err2;
+                       goto err3;
 
                /* Copy fold into fnew */
                fnew->tp = fold->tp;
@@ -458,31 +462,31 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
                if (tb[TCA_FLOW_MODE])
                        mode = nla_get_u32(tb[TCA_FLOW_MODE]);
                if (mode != FLOW_MODE_HASH && nkeys > 1)
-                       goto err2;
+                       goto err3;
 
                if (mode == FLOW_MODE_HASH)
                        perturb_period = fold->perturb_period;
                if (tb[TCA_FLOW_PERTURB]) {
                        if (mode != FLOW_MODE_HASH)
-                               goto err2;
+                               goto err3;
                        perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
                }
        } else {
                err = -EINVAL;
                if (!handle)
-                       goto err2;
+                       goto err3;
                if (!tb[TCA_FLOW_KEYS])
-                       goto err2;
+                       goto err3;
 
                mode = FLOW_MODE_MAP;
                if (tb[TCA_FLOW_MODE])
                        mode = nla_get_u32(tb[TCA_FLOW_MODE]);
                if (mode != FLOW_MODE_HASH && nkeys > 1)
-                       goto err2;
+                       goto err3;
 
                if (tb[TCA_FLOW_PERTURB]) {
                        if (mode != FLOW_MODE_HASH)
-                               goto err2;
+                               goto err3;
                        perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
                }
 
@@ -542,6 +546,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
                call_rcu(&fold->rcu, flow_destroy_filter);
        return 0;
 
+err3:
+       tcf_exts_destroy(&fnew->exts);
 err2:
        tcf_em_tree_destroy(&t);
        kfree(fnew);
index 5060801..a3f4c70 100644 (file)
 #include <net/ip.h>
 #include <net/flow_dissector.h>
 
+#include <net/dst.h>
+#include <net/dst_metadata.h>
+
 struct fl_flow_key {
        int     indev_ifindex;
        struct flow_dissector_key_control control;
+       struct flow_dissector_key_control enc_control;
        struct flow_dissector_key_basic basic;
        struct flow_dissector_key_eth_addrs eth;
-       struct flow_dissector_key_addrs ipaddrs;
+       struct flow_dissector_key_vlan vlan;
        union {
                struct flow_dissector_key_ipv4_addrs ipv4;
                struct flow_dissector_key_ipv6_addrs ipv6;
        };
        struct flow_dissector_key_ports tp;
+       struct flow_dissector_key_keyid enc_key_id;
+       union {
+               struct flow_dissector_key_ipv4_addrs enc_ipv4;
+               struct flow_dissector_key_ipv6_addrs enc_ipv6;
+       };
 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
 
 struct fl_flow_mask_range {
@@ -123,11 +132,31 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
        struct cls_fl_filter *f;
        struct fl_flow_key skb_key;
        struct fl_flow_key skb_mkey;
+       struct ip_tunnel_info *info;
 
        if (!atomic_read(&head->ht.nelems))
                return -1;
 
        fl_clear_masked_range(&skb_key, &head->mask);
+
+       info = skb_tunnel_info(skb);
+       if (info) {
+               struct ip_tunnel_key *key = &info->key;
+
+               switch (ip_tunnel_info_af(info)) {
+               case AF_INET:
+                       skb_key.enc_ipv4.src = key->u.ipv4.src;
+                       skb_key.enc_ipv4.dst = key->u.ipv4.dst;
+                       break;
+               case AF_INET6:
+                       skb_key.enc_ipv6.src = key->u.ipv6.src;
+                       skb_key.enc_ipv6.dst = key->u.ipv6.dst;
+                       break;
+               }
+
+               skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id);
+       }
+
        skb_key.indev_ifindex = skb->skb_iif;
        /* skb_flow_dissect() does not set n_proto in case an unknown protocol,
         * so do it rather here.
@@ -293,6 +322,22 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
        [TCA_FLOWER_KEY_TCP_DST]        = { .type = NLA_U16 },
        [TCA_FLOWER_KEY_UDP_SRC]        = { .type = NLA_U16 },
        [TCA_FLOWER_KEY_UDP_DST]        = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_VLAN_ID]        = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_VLAN_PRIO]      = { .type = NLA_U8 },
+       [TCA_FLOWER_KEY_VLAN_ETH_TYPE]  = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_ENC_KEY_ID]     = { .type = NLA_U32 },
+       [TCA_FLOWER_KEY_ENC_IPV4_SRC]   = { .type = NLA_U32 },
+       [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NLA_U32 },
+       [TCA_FLOWER_KEY_ENC_IPV4_DST]   = { .type = NLA_U32 },
+       [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NLA_U32 },
+       [TCA_FLOWER_KEY_ENC_IPV6_SRC]   = { .len = sizeof(struct in6_addr) },
+       [TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
+       [TCA_FLOWER_KEY_ENC_IPV6_DST]   = { .len = sizeof(struct in6_addr) },
+       [TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
+       [TCA_FLOWER_KEY_TCP_SRC_MASK]   = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_TCP_DST_MASK]   = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_UDP_SRC_MASK]   = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_UDP_DST_MASK]   = { .type = NLA_U16 },
 };
 
 static void fl_set_key_val(struct nlattr **tb,
@@ -308,9 +353,29 @@ static void fl_set_key_val(struct nlattr **tb,
                memcpy(mask, nla_data(tb[mask_type]), len);
 }
 
+static void fl_set_key_vlan(struct nlattr **tb,
+                           struct flow_dissector_key_vlan *key_val,
+                           struct flow_dissector_key_vlan *key_mask)
+{
+#define VLAN_PRIORITY_MASK     0x7
+
+       if (tb[TCA_FLOWER_KEY_VLAN_ID]) {
+               key_val->vlan_id =
+                       nla_get_u16(tb[TCA_FLOWER_KEY_VLAN_ID]) & VLAN_VID_MASK;
+               key_mask->vlan_id = VLAN_VID_MASK;
+       }
+       if (tb[TCA_FLOWER_KEY_VLAN_PRIO]) {
+               key_val->vlan_priority =
+                       nla_get_u8(tb[TCA_FLOWER_KEY_VLAN_PRIO]) &
+                       VLAN_PRIORITY_MASK;
+               key_mask->vlan_priority = VLAN_PRIORITY_MASK;
+       }
+}
+
 static int fl_set_key(struct net *net, struct nlattr **tb,
                      struct fl_flow_key *key, struct fl_flow_key *mask)
 {
+       __be16 ethertype;
 #ifdef CONFIG_NET_CLS_IND
        if (tb[TCA_FLOWER_INDEV]) {
                int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
@@ -328,9 +393,20 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
                       mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
                       sizeof(key->eth.src));
 
-       fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
-                      &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
-                      sizeof(key->basic.n_proto));
+       if (tb[TCA_FLOWER_KEY_ETH_TYPE]) {
+               ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);
+
+               if (ethertype == htons(ETH_P_8021Q)) {
+                       fl_set_key_vlan(tb, &key->vlan, &mask->vlan);
+                       fl_set_key_val(tb, &key->basic.n_proto,
+                                      TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+                                      &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+                                      sizeof(key->basic.n_proto));
+               } else {
+                       key->basic.n_proto = ethertype;
+                       mask->basic.n_proto = cpu_to_be16(~0);
+               }
+       }
 
        if (key->basic.n_proto == htons(ETH_P_IP) ||
            key->basic.n_proto == htons(ETH_P_IPV6)) {
@@ -359,20 +435,54 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 
        if (key->basic.ip_proto == IPPROTO_TCP) {
                fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
-                              &mask->tp.src, TCA_FLOWER_UNSPEC,
+                              &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
                               sizeof(key->tp.src));
                fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
-                              &mask->tp.dst, TCA_FLOWER_UNSPEC,
+                              &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
                               sizeof(key->tp.dst));
        } else if (key->basic.ip_proto == IPPROTO_UDP) {
                fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
-                              &mask->tp.src, TCA_FLOWER_UNSPEC,
+                              &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
                               sizeof(key->tp.src));
                fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
-                              &mask->tp.dst, TCA_FLOWER_UNSPEC,
+                              &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
                               sizeof(key->tp.dst));
        }
 
+       if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
+           tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
+               key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+               fl_set_key_val(tb, &key->enc_ipv4.src,
+                              TCA_FLOWER_KEY_ENC_IPV4_SRC,
+                              &mask->enc_ipv4.src,
+                              TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
+                              sizeof(key->enc_ipv4.src));
+               fl_set_key_val(tb, &key->enc_ipv4.dst,
+                              TCA_FLOWER_KEY_ENC_IPV4_DST,
+                              &mask->enc_ipv4.dst,
+                              TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
+                              sizeof(key->enc_ipv4.dst));
+       }
+
+       if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] ||
+           tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) {
+               key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+               fl_set_key_val(tb, &key->enc_ipv6.src,
+                              TCA_FLOWER_KEY_ENC_IPV6_SRC,
+                              &mask->enc_ipv6.src,
+                              TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
+                              sizeof(key->enc_ipv6.src));
+               fl_set_key_val(tb, &key->enc_ipv6.dst,
+                              TCA_FLOWER_KEY_ENC_IPV6_DST,
+                              &mask->enc_ipv6.dst,
+                              TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
+                              sizeof(key->enc_ipv6.dst));
+       }
+
+       fl_set_key_val(tb, &key->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID,
+                      &mask->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID,
+                      sizeof(key->enc_key_id.keyid));
+
        return 0;
 }
 
@@ -404,12 +514,10 @@ static int fl_init_hashtable(struct cls_fl_head *head,
 
 #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
 #define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
-#define FL_KEY_MEMBER_END_OFFSET(member)                                       \
-       (FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member))
 
-#define FL_KEY_IN_RANGE(mask, member)                                          \
-        (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end &&                  \
-         FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start)
+#define FL_KEY_IS_MASKED(mask, member)                                         \
+       memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member),               \
+                  0, FL_KEY_MEMBER_SIZE(member))                               \
 
 #define FL_KEY_SET(keys, cnt, id, member)                                      \
        do {                                                                    \
@@ -418,9 +526,9 @@ static int fl_init_hashtable(struct cls_fl_head *head,
                cnt++;                                                          \
        } while(0);
 
-#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member)                    \
+#define FL_KEY_SET_IF_MASKED(mask, keys, cnt, id, member)                      \
        do {                                                                    \
-               if (FL_KEY_IN_RANGE(mask, member))                              \
+               if (FL_KEY_IS_MASKED(mask, member))                             \
                        FL_KEY_SET(keys, cnt, id, member);                      \
        } while(0);
 
@@ -432,14 +540,16 @@ static void fl_init_dissector(struct cls_fl_head *head,
 
        FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
        FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
-       FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
-                              FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
-       FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
-                              FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
-       FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
-                              FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
-       FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
-                              FLOW_DISSECTOR_KEY_PORTS, tp);
+       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+                            FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
+       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+                            FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+                            FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
+       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+                            FLOW_DISSECTOR_KEY_PORTS, tp);
+       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+                            FLOW_DISSECTOR_KEY_VLAN, vlan);
 
        skb_flow_dissector_init(&head->dissector, keys, cnt);
 }
@@ -478,10 +588,12 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
        struct tcf_exts e;
        int err;
 
-       tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
-       err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+       err = tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
        if (err < 0)
                return err;
+       err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+       if (err < 0)
+               goto errout;
 
        if (tb[TCA_FLOWER_CLASSID]) {
                f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
@@ -550,7 +662,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
        if (!fnew)
                return -ENOBUFS;
 
-       tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+       err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+       if (err < 0)
+               goto errout;
 
        if (!handle) {
                handle = fl_grab_new_handle(tp, head);
@@ -614,6 +728,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
        return 0;
 
 errout:
+       tcf_exts_destroy(&fnew->exts);
        kfree(fnew);
        return err;
 }
@@ -668,6 +783,29 @@ static int fl_dump_key_val(struct sk_buff *skb,
        return 0;
 }
 
+static int fl_dump_key_vlan(struct sk_buff *skb,
+                           struct flow_dissector_key_vlan *vlan_key,
+                           struct flow_dissector_key_vlan *vlan_mask)
+{
+       int err;
+
+       if (!memchr_inv(vlan_mask, 0, sizeof(*vlan_mask)))
+               return 0;
+       if (vlan_mask->vlan_id) {
+               err = nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ID,
+                                 vlan_key->vlan_id);
+               if (err)
+                       return err;
+       }
+       if (vlan_mask->vlan_priority) {
+               err = nla_put_u8(skb, TCA_FLOWER_KEY_VLAN_PRIO,
+                                vlan_key->vlan_priority);
+               if (err)
+                       return err;
+       }
+       return 0;
+}
+
 static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
                   struct sk_buff *skb, struct tcmsg *t)
 {
@@ -712,6 +850,10 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
                            &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
                            sizeof(key->basic.n_proto)))
                goto nla_put_failure;
+
+       if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan))
+               goto nla_put_failure;
+
        if ((key->basic.n_proto == htons(ETH_P_IP) ||
             key->basic.n_proto == htons(ETH_P_IPV6)) &&
            fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
@@ -738,21 +880,48 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 
        if (key->basic.ip_proto == IPPROTO_TCP &&
            (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
-                            &mask->tp.src, TCA_FLOWER_UNSPEC,
+                            &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
                             sizeof(key->tp.src)) ||
             fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
-                            &mask->tp.dst, TCA_FLOWER_UNSPEC,
+                            &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
                             sizeof(key->tp.dst))))
                goto nla_put_failure;
        else if (key->basic.ip_proto == IPPROTO_UDP &&
                 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
-                                 &mask->tp.src, TCA_FLOWER_UNSPEC,
+                                 &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
                                  sizeof(key->tp.src)) ||
                  fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
-                                 &mask->tp.dst, TCA_FLOWER_UNSPEC,
+                                 &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
                                  sizeof(key->tp.dst))))
                goto nla_put_failure;
 
+       if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
+           (fl_dump_key_val(skb, &key->enc_ipv4.src,
+                           TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src,
+                           TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
+                           sizeof(key->enc_ipv4.src)) ||
+            fl_dump_key_val(skb, &key->enc_ipv4.dst,
+                            TCA_FLOWER_KEY_ENC_IPV4_DST, &mask->enc_ipv4.dst,
+                            TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
+                            sizeof(key->enc_ipv4.dst))))
+               goto nla_put_failure;
+       else if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
+                (fl_dump_key_val(skb, &key->enc_ipv6.src,
+                           TCA_FLOWER_KEY_ENC_IPV6_SRC, &mask->enc_ipv6.src,
+                           TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
+                           sizeof(key->enc_ipv6.src)) ||
+                fl_dump_key_val(skb, &key->enc_ipv6.dst,
+                                TCA_FLOWER_KEY_ENC_IPV6_DST,
+                                &mask->enc_ipv6.dst,
+                                TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
+                           sizeof(key->enc_ipv6.dst))))
+               goto nla_put_failure;
+
+       if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID,
+                           &mask->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID,
+                           sizeof(key->enc_key_id)))
+               goto nla_put_failure;
+
        nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
 
        if (tcf_exts_dump(skb, &f->exts))
index f23a3b6..cc0bda9 100644 (file)
@@ -195,10 +195,12 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
        u32 mask;
        int err;
 
-       tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
-       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+       err = tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
        if (err < 0)
                return err;
+       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+       if (err < 0)
+               goto errout;
 
        if (tb[TCA_FW_CLASSID]) {
                f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
@@ -270,10 +272,15 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 #endif /* CONFIG_NET_CLS_IND */
                fnew->tp = f->tp;
 
-               tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE);
+               err = tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE);
+               if (err < 0) {
+                       kfree(fnew);
+                       return err;
+               }
 
                err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr);
                if (err < 0) {
+                       tcf_exts_destroy(&fnew->exts);
                        kfree(fnew);
                        return err;
                }
@@ -313,7 +320,9 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
        if (f == NULL)
                return -ENOBUFS;
 
-       tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
+       err = tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
+       if (err < 0)
+               goto errout;
        f->id = handle;
        f->tp = tp;
 
@@ -328,6 +337,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
        return 0;
 
 errout:
+       tcf_exts_destroy(&f->exts);
        kfree(f);
        return err;
 }
index 08a3b0a..c91e65d 100644 (file)
@@ -383,17 +383,19 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
                            struct nlattr **tb, struct nlattr *est, int new,
                            bool ovr)
 {
-       int err;
        u32 id = 0, to = 0, nhandle = 0x8000;
        struct route4_filter *fp;
        unsigned int h1;
        struct route4_bucket *b;
        struct tcf_exts e;
+       int err;
 
-       tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
-       err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+       err = tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
        if (err < 0)
                return err;
+       err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+       if (err < 0)
+               goto errout;
 
        err = -EINVAL;
        if (tb[TCA_ROUTE4_TO]) {
@@ -503,7 +505,10 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
        if (!f)
                goto errout;
 
-       tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+       err = tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+       if (err < 0)
+               goto errout;
+
        if (fold) {
                f->id = fold->id;
                f->iif = fold->iif;
@@ -557,6 +562,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
        return 0;
 
 errout:
+       tcf_exts_destroy(&f->exts);
        kfree(f);
        return err;
 }
index f9c9fc0..4f05a19 100644 (file)
@@ -487,10 +487,12 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
        if (err < 0)
                return err;
 
-       tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
-       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+       err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
        if (err < 0)
                return err;
+       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+       if (err < 0)
+               goto errout2;
 
        f = (struct rsvp_filter *)*arg;
        if (f) {
@@ -506,7 +508,11 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
                        goto errout2;
                }
 
-               tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+               err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+               if (err < 0) {
+                       kfree(n);
+                       goto errout2;
+               }
 
                if (tb[TCA_RSVP_CLASSID]) {
                        n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
@@ -530,7 +536,9 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
        if (f == NULL)
                goto errout2;
 
-       tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+       err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+       if (err < 0)
+               goto errout;
        h2 = 16;
        if (tb[TCA_RSVP_SRC]) {
                memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
@@ -627,6 +635,7 @@ insert:
        goto insert;
 
 errout:
+       tcf_exts_destroy(&f->exts);
        kfree(f);
 errout2:
        tcf_exts_destroy(&e);
index 944c8ff..d950070 100644 (file)
@@ -219,10 +219,10 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
        [TCA_TCINDEX_CLASSID]           = { .type = NLA_U32 },
 };
 
-static void tcindex_filter_result_init(struct tcindex_filter_result *r)
+static int tcindex_filter_result_init(struct tcindex_filter_result *r)
 {
        memset(r, 0, sizeof(*r));
-       tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+       return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 }
 
 static void __tcindex_partial_destroy(struct rcu_head *head)
@@ -233,23 +233,57 @@ static void __tcindex_partial_destroy(struct rcu_head *head)
        kfree(p);
 }
 
+static void tcindex_free_perfect_hash(struct tcindex_data *cp)
+{
+       int i;
+
+       for (i = 0; i < cp->hash; i++)
+               tcf_exts_destroy(&cp->perfect[i].exts);
+       kfree(cp->perfect);
+}
+
+static int tcindex_alloc_perfect_hash(struct tcindex_data *cp)
+{
+       int i, err = 0;
+
+       cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result),
+                             GFP_KERNEL);
+       if (!cp->perfect)
+               return -ENOMEM;
+
+       for (i = 0; i < cp->hash; i++) {
+               err = tcf_exts_init(&cp->perfect[i].exts,
+                                   TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+               if (err < 0)
+                       goto errout;
+       }
+
+       return 0;
+
+errout:
+       tcindex_free_perfect_hash(cp);
+       return err;
+}
+
 static int
 tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
                  u32 handle, struct tcindex_data *p,
                  struct tcindex_filter_result *r, struct nlattr **tb,
                  struct nlattr *est, bool ovr)
 {
-       int err, balloc = 0;
        struct tcindex_filter_result new_filter_result, *old_r = r;
        struct tcindex_filter_result cr;
-       struct tcindex_data *cp, *oldp;
+       struct tcindex_data *cp = NULL, *oldp;
        struct tcindex_filter *f = NULL; /* make gcc behave */
+       int err, balloc = 0;
        struct tcf_exts e;
 
-       tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
-       err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+       err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
        if (err < 0)
                return err;
+       err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+       if (err < 0)
+               goto errout;
 
        err = -ENOMEM;
        /* tcindex_data attributes must look atomic to classifier/lookup so
@@ -270,19 +304,20 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
        if (p->perfect) {
                int i;
 
-               cp->perfect = kmemdup(p->perfect,
-                                     sizeof(*r) * cp->hash, GFP_KERNEL);
-               if (!cp->perfect)
+               if (tcindex_alloc_perfect_hash(cp) < 0)
                        goto errout;
                for (i = 0; i < cp->hash; i++)
-                       tcf_exts_init(&cp->perfect[i].exts,
-                                     TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+                       cp->perfect[i].res = p->perfect[i].res;
                balloc = 1;
        }
        cp->h = p->h;
 
-       tcindex_filter_result_init(&new_filter_result);
-       tcindex_filter_result_init(&cr);
+       err = tcindex_filter_result_init(&new_filter_result);
+       if (err < 0)
+               goto errout1;
+       err = tcindex_filter_result_init(&cr);
+       if (err < 0)
+               goto errout1;
        if (old_r)
                cr.res = r->res;
 
@@ -338,15 +373,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
        err = -ENOMEM;
        if (!cp->perfect && !cp->h) {
                if (valid_perfect_hash(cp)) {
-                       int i;
-
-                       cp->perfect = kcalloc(cp->hash, sizeof(*r), GFP_KERNEL);
-                       if (!cp->perfect)
+                       if (tcindex_alloc_perfect_hash(cp) < 0)
                                goto errout_alloc;
-                       for (i = 0; i < cp->hash; i++)
-                               tcf_exts_init(&cp->perfect[i].exts,
-                                             TCA_TCINDEX_ACT,
-                                             TCA_TCINDEX_POLICE);
                        balloc = 1;
                } else {
                        struct tcindex_filter __rcu **hash;
@@ -373,8 +401,12 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
                if (!f)
                        goto errout_alloc;
                f->key = handle;
-               tcindex_filter_result_init(&f->result);
                f->next = NULL;
+               err = tcindex_filter_result_init(&f->result);
+               if (err < 0) {
+                       kfree(f);
+                       goto errout_alloc;
+               }
        }
 
        if (tb[TCA_TCINDEX_CLASSID]) {
@@ -387,8 +419,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
        else
                tcf_exts_change(tp, &cr.exts, &e);
 
-       if (old_r && old_r != r)
-               tcindex_filter_result_init(old_r);
+       if (old_r && old_r != r) {
+               err = tcindex_filter_result_init(old_r);
+               if (err < 0) {
+                       kfree(f);
+                       goto errout_alloc;
+               }
+       }
 
        oldp = p;
        r->res = cr.res;
@@ -415,9 +452,12 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 
 errout_alloc:
        if (balloc == 1)
-               kfree(cp->perfect);
+               tcindex_free_perfect_hash(cp);
        else if (balloc == 2)
                kfree(cp->h);
+errout1:
+       tcf_exts_destroy(&cr.exts);
+       tcf_exts_destroy(&new_filter_result.exts);
 errout:
        kfree(cp);
        tcf_exts_destroy(&e);
index ffe593e..a29263a 100644 (file)
@@ -709,13 +709,15 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
                         struct tc_u_knode *n, struct nlattr **tb,
                         struct nlattr *est, bool ovr)
 {
-       int err;
        struct tcf_exts e;
+       int err;
 
-       tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
-       err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+       err = tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
        if (err < 0)
                return err;
+       err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+       if (err < 0)
+               goto errout;
 
        err = -EINVAL;
        if (tb[TCA_U32_LINK]) {
@@ -833,7 +835,10 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
        new->tp = tp;
        memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
 
-       tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE);
+       if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
+               kfree(new);
+               return NULL;
+       }
 
        return new;
 }
@@ -985,9 +990,12 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
        n->handle = handle;
        n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
        n->flags = flags;
-       tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
        n->tp = tp;
 
+       err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
+       if (err < 0)
+               goto errout;
+
 #ifdef CONFIG_CLS_U32_MARK
        n->pcpu_success = alloc_percpu(u32);
        if (!n->pcpu_success) {
@@ -1028,9 +1036,10 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 errhw:
 #ifdef CONFIG_CLS_U32_MARK
        free_percpu(n->pcpu_success);
-errout:
 #endif
 
+errout:
+       tcf_exts_destroy(&n->exts);
 #ifdef CONFIG_CLS_U32_PERF
        free_percpu(n->pf);
 #endif
index 25aada7..d677b34 100644 (file)
@@ -260,6 +260,9 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
 {
        struct Qdisc *q;
 
+       if (!qdisc_dev(root))
+               return (root->handle == handle ? root : NULL);
+
        if (!(root->flags & TCQ_F_BUILTIN) &&
            root->handle == handle)
                return root;
@@ -1432,7 +1435,7 @@ err_out:
 
 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
                              struct netlink_callback *cb,
-                             int *q_idx_p, int s_q_idx)
+                             int *q_idx_p, int s_q_idx, bool recur)
 {
        int ret = 0, q_idx = *q_idx_p;
        struct Qdisc *q;
@@ -1451,6 +1454,16 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
                        goto done;
                q_idx++;
        }
+
+       /* If dumping singletons, there is no qdisc_dev(root) and the singleton
+        * itself has already been dumped.
+        *
+        * If we've already dumped the top-level (ingress) qdisc above and the global
+        * qdisc hashtable, we don't want to hit it again
+        */
+       if (!qdisc_dev(root) || !recur)
+               goto out;
+
        hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
                if (q_idx < s_q_idx) {
                        q_idx++;
@@ -1492,13 +1505,13 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
                        s_q_idx = 0;
                q_idx = 0;
 
-               if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
+               if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, true) < 0)
                        goto done;
 
                dev_queue = dev_ingress_queue(dev);
                if (dev_queue &&
                    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
-                                      &q_idx, s_q_idx) < 0)
+                                      &q_idx, s_q_idx, false) < 0)
                        goto done;
 
 cont:
@@ -1775,6 +1788,9 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
        if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
                return -1;
 
+       if (!qdisc_dev(root))
+               return 0;
+
        hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
                if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
                        return -1;
index 18faecc..0d21b56 100644 (file)
@@ -641,18 +641,19 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
        struct Qdisc *sch;
 
        if (!try_module_get(ops->owner))
-               goto errout;
+               return NULL;
 
        sch = qdisc_alloc(dev_queue, ops);
-       if (IS_ERR(sch))
-               goto errout;
+       if (IS_ERR(sch)) {
+               module_put(ops->owner);
+               return NULL;
+       }
        sch->parent = parentid;
 
        if (!ops->init || ops->init(sch, NULL) == 0)
                return sch;
 
        qdisc_destroy(sch);
-errout:
        return NULL;
 }
 EXPORT_SYMBOL(qdisc_create_dflt);
index 912eb16..f99d485 100644 (file)
@@ -48,7 +48,7 @@ static struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = {
                /* id 2 is reserved as well */
                .hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_2,
        },
-#if defined (CONFIG_CRYPTO_SHA256) || defined (CONFIG_CRYPTO_SHA256_MODULE)
+#if IS_ENABLED(CONFIG_CRYPTO_SHA256)
        {
                .hmac_id = SCTP_AUTH_HMAC_ID_SHA256,
                .hmac_name = "hmac(sha256)",
index c182db7..69444d3 100644 (file)
@@ -119,7 +119,13 @@ int sctp_rcv(struct sk_buff *skb)
                       skb_transport_offset(skb))
                goto discard_it;
 
-       if (!pskb_may_pull(skb, sizeof(struct sctphdr)))
+       /* If the packet is fragmented and we need to do crc checking,
+        * it's better to just linearize it otherwise crc computing
+        * takes longer.
+        */
+       if ((!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
+            skb_linearize(skb)) ||
+           !pskb_may_pull(skb, sizeof(struct sctphdr)))
                goto discard_it;
 
        /* Pull up the IP header. */
@@ -1177,9 +1183,6 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
        if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
                return NULL;
 
-       if (skb_linearize(skb))
-               return NULL;
-
        ch = (sctp_chunkhdr_t *) skb->data;
 
        /* The code below will attempt to walk the chunk and extract
index c30ddb0..6437aa9 100644 (file)
@@ -170,19 +170,6 @@ next_chunk:
 
                chunk = list_entry(entry, struct sctp_chunk, list);
 
-               /* Linearize if it's not GSO */
-               if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) != SKB_GSO_SCTP &&
-                   skb_is_nonlinear(chunk->skb)) {
-                       if (skb_linearize(chunk->skb)) {
-                               __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
-                               sctp_chunk_free(chunk);
-                               goto next_chunk;
-                       }
-
-                       /* Update sctp_hdr as it probably changed */
-                       chunk->sctp_hdr = sctp_hdr(chunk->skb);
-               }
-
                if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) {
                        /* GSO-marked skbs but without frags, handle
                         * them normally
index 1f1682b..31b7bc3 100644 (file)
@@ -878,7 +878,7 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
                                        struct sctp_chunk *chunk,
                                        u16 chunk_len)
 {
-       size_t psize, pmtu;
+       size_t psize, pmtu, maxsize;
        sctp_xmit_t retval = SCTP_XMIT_OK;
 
        psize = packet->size;
@@ -906,6 +906,17 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
                        goto out;
                }
 
+               /* Similarly, if this chunk was built before a PMTU
+                * reduction, we have to fragment it at IP level now. So
+                * if the packet already contains something, we need to
+                * flush.
+                */
+               maxsize = pmtu - packet->overhead;
+               if (packet->auth)
+                       maxsize -= WORD_ROUND(packet->auth->skb->len);
+               if (chunk_len > maxsize)
+                       retval = SCTP_XMIT_PMTU_FULL;
+
                /* It is also okay to fragment if the chunk we are
                 * adding is a control chunk, but only if current packet
                 * is not a GSO one otherwise it causes fragmentation of
index bb69153..807158e 100644 (file)
@@ -106,7 +106,8 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
                               const struct inet_diag_req_v2 *req,
                               struct user_namespace *user_ns,
                               int portid, u32 seq, u16 nlmsg_flags,
-                              const struct nlmsghdr *unlh)
+                              const struct nlmsghdr *unlh,
+                              bool net_admin)
 {
        struct sctp_endpoint *ep = sctp_sk(sk)->ep;
        struct list_head *addr_list;
@@ -133,7 +134,7 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
                r->idiag_retrans = 0;
        }
 
-       if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns))
+       if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
                goto errout;
 
        if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) {
@@ -203,6 +204,7 @@ struct sctp_comm_param {
        struct netlink_callback *cb;
        const struct inet_diag_req_v2 *r;
        const struct nlmsghdr *nlh;
+       bool net_admin;
 };
 
 static size_t inet_assoc_attr_size(struct sctp_association *asoc)
@@ -219,6 +221,7 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc)
                + nla_total_size(1) /* INET_DIAG_SHUTDOWN */
                + nla_total_size(1) /* INET_DIAG_TOS */
                + nla_total_size(1) /* INET_DIAG_TCLASS */
+               + nla_total_size(4) /* INET_DIAG_MARK */
                + nla_total_size(addrlen * asoc->peer.transport_count)
                + nla_total_size(addrlen * addrcnt)
                + nla_total_size(sizeof(struct inet_diag_meminfo))
@@ -256,7 +259,8 @@ static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p)
        err = inet_sctp_diag_fill(sk, assoc, rep, req,
                                  sk_user_ns(NETLINK_CB(in_skb).sk),
                                  NETLINK_CB(in_skb).portid,
-                                 nlh->nlmsg_seq, 0, nlh);
+                                 nlh->nlmsg_seq, 0, nlh,
+                                 commp->net_admin);
        release_sock(sk);
        if (err < 0) {
                WARN_ON(err == -EMSGSIZE);
@@ -310,7 +314,8 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p)
                                        sk_user_ns(NETLINK_CB(cb->skb).sk),
                                        NETLINK_CB(cb->skb).portid,
                                        cb->nlh->nlmsg_seq,
-                                       NLM_F_MULTI, cb->nlh) < 0) {
+                                       NLM_F_MULTI, cb->nlh,
+                                       commp->net_admin) < 0) {
                        cb->args[3] = 1;
                        err = 2;
                        goto release;
@@ -320,7 +325,8 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p)
                if (inet_sctp_diag_fill(sk, assoc, skb, r,
                                        sk_user_ns(NETLINK_CB(cb->skb).sk),
                                        NETLINK_CB(cb->skb).portid,
-                                       cb->nlh->nlmsg_seq, 0, cb->nlh) < 0) {
+                                       cb->nlh->nlmsg_seq, 0, cb->nlh,
+                                       commp->net_admin) < 0) {
                        err = 2;
                        goto release;
                }
@@ -375,7 +381,7 @@ static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
                                sk_user_ns(NETLINK_CB(cb->skb).sk),
                                NETLINK_CB(cb->skb).portid,
                                cb->nlh->nlmsg_seq, NLM_F_MULTI,
-                               cb->nlh) < 0) {
+                               cb->nlh, commp->net_admin) < 0) {
                err = 2;
                goto out;
        }
@@ -412,6 +418,7 @@ static int sctp_diag_dump_one(struct sk_buff *in_skb,
                .skb = in_skb,
                .r = req,
                .nlh = nlh,
+               .net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN),
        };
 
        if (req->sdiag_family == AF_INET) {
@@ -424,11 +431,13 @@ static int sctp_diag_dump_one(struct sk_buff *in_skb,
                paddr.v4.sin_family = AF_INET;
        } else {
                laddr.v6.sin6_port = req->id.idiag_sport;
-               memcpy(&laddr.v6.sin6_addr, req->id.idiag_src, 64);
+               memcpy(&laddr.v6.sin6_addr, req->id.idiag_src,
+                      sizeof(laddr.v6.sin6_addr));
                laddr.v6.sin6_family = AF_INET6;
 
                paddr.v6.sin6_port = req->id.idiag_dport;
-               memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst, 64);
+               memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst,
+                      sizeof(paddr.v6.sin6_addr));
                paddr.v6.sin6_family = AF_INET6;
        }
 
@@ -445,6 +454,7 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
                .skb = skb,
                .cb = cb,
                .r = r,
+               .net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN),
        };
 
        /* eps hashtable dumps
index fd688c0..5c7549b 100644 (file)
@@ -26,7 +26,6 @@
 #include <net/strparser.h>
 #include <net/netns/generic.h>
 #include <net/sock.h>
-#include <net/tcp.h>
 
 static struct workqueue_struct *strp_wq;
 
@@ -80,9 +79,16 @@ static void strp_parser_err(struct strparser *strp, int err,
        strp->cb.abort_parser(strp, err);
 }
 
+static inline int strp_peek_len(struct strparser *strp)
+{
+       struct socket *sock = strp->sk->sk_socket;
+
+       return sock->ops->peek_len(sock);
+}
+
 /* Lower socket lock held */
-static int strp_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
-                        unsigned int orig_offset, size_t orig_len)
+static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+                    unsigned int orig_offset, size_t orig_len)
 {
        struct strparser *strp = (struct strparser *)desc->arg.data;
        struct _strp_rx_msg *rxm;
@@ -266,12 +272,12 @@ static int strp_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
                if (extra < 0) {
                        /* Message not complete yet. */
                        if (rxm->strp.full_len - rxm->accum_len >
-                           tcp_inq(strp->sk)) {
+                           strp_peek_len(strp)) {
                                /* Don't have the whole messages in the socket
                                 * buffer. Set strp->rx_need_bytes to wait for
                                 * the rest of the message. Also, set "early
                                 * eaten" since we've already buffered the skb
-                                * but don't consume yet per tcp_read_sock.
+                                * but don't consume yet per strp_read_sock.
                                 */
 
                                if (!rxm->accum_len) {
@@ -329,16 +335,17 @@ static int default_read_sock_done(struct strparser *strp, int err)
 }
 
 /* Called with lock held on lower socket */
-static int strp_tcp_read_sock(struct strparser *strp)
+static int strp_read_sock(struct strparser *strp)
 {
+       struct socket *sock = strp->sk->sk_socket;
        read_descriptor_t desc;
 
        desc.arg.data = strp;
        desc.error = 0;
        desc.count = 1; /* give more than one skb per call */
 
-       /* sk should be locked here, so okay to do tcp_read_sock */
-       tcp_read_sock(strp->sk, &desc, strp_tcp_recv);
+       /* sk should be locked here, so okay to do read_sock */
+       sock->ops->read_sock(strp->sk, &desc, strp_recv);
 
        desc.error = strp->cb.read_sock_done(strp, desc.error);
 
@@ -346,10 +353,8 @@ static int strp_tcp_read_sock(struct strparser *strp)
 }
 
 /* Lower sock lock held */
-void strp_tcp_data_ready(struct strparser *strp)
+void strp_data_ready(struct strparser *strp)
 {
-       struct sock *csk = strp->sk;
-
        if (unlikely(strp->rx_stopped))
                return;
 
@@ -360,7 +365,7 @@ void strp_tcp_data_ready(struct strparser *strp)
         * allows a thread in BH context to safely check if the process
         * lock is held. In this case, if the lock is held, queue work.
         */
-       if (sock_owned_by_user(csk)) {
+       if (sock_owned_by_user(strp->sk)) {
                queue_work(strp_wq, &strp->rx_work);
                return;
        }
@@ -369,30 +374,27 @@ void strp_tcp_data_ready(struct strparser *strp)
                return;
 
        if (strp->rx_need_bytes) {
-               if (tcp_inq(csk) >= strp->rx_need_bytes)
+               if (strp_peek_len(strp) >= strp->rx_need_bytes)
                        strp->rx_need_bytes = 0;
                else
                        return;
        }
 
-       if (strp_tcp_read_sock(strp) == -ENOMEM)
+       if (strp_read_sock(strp) == -ENOMEM)
                queue_work(strp_wq, &strp->rx_work);
 }
-EXPORT_SYMBOL_GPL(strp_tcp_data_ready);
+EXPORT_SYMBOL_GPL(strp_data_ready);
 
 static void do_strp_rx_work(struct strparser *strp)
 {
        read_descriptor_t rd_desc;
        struct sock *csk = strp->sk;
 
-       /* We need the read lock to synchronize with strp_tcp_data_ready. We
-        * need the socket lock for calling tcp_read_sock.
+       /* We need the read lock to synchronize with strp_data_ready. We
+        * need the socket lock for calling strp_read_sock.
         */
        lock_sock(csk);
 
-       if (unlikely(csk->sk_user_data != strp))
-               goto out;
-
        if (unlikely(strp->rx_stopped))
                goto out;
 
@@ -401,7 +403,7 @@ static void do_strp_rx_work(struct strparser *strp)
 
        rd_desc.arg.data = strp;
 
-       if (strp_tcp_read_sock(strp) == -ENOMEM)
+       if (strp_read_sock(strp) == -ENOMEM)
                queue_work(strp_wq, &strp->rx_work);
 
 out:
@@ -427,9 +429,14 @@ static void strp_rx_msg_timeout(unsigned long arg)
 int strp_init(struct strparser *strp, struct sock *csk,
              struct strp_callbacks *cb)
 {
+       struct socket *sock = csk->sk_socket;
+
        if (!cb || !cb->rcv_msg || !cb->parse_msg)
                return -EINVAL;
 
+       if (!sock->ops->read_sock || !sock->ops->peek_len)
+               return -EAFNOSUPPORT;
+
        memset(strp, 0, sizeof(*strp));
 
        strp->sk = csk;
@@ -448,7 +455,18 @@ int strp_init(struct strparser *strp, struct sock *csk,
 }
 EXPORT_SYMBOL_GPL(strp_init);
 
-/* strp must already be stopped so that strp_tcp_recv will no longer be called.
+void strp_unpause(struct strparser *strp)
+{
+       strp->rx_paused = 0;
+
+       /* Sync setting rx_paused with RX work */
+       smp_mb();
+
+       queue_work(strp_wq, &strp->rx_work);
+}
+EXPORT_SYMBOL_GPL(strp_unpause);
+
+/* strp must already be stopped so that strp_recv will no longer be called.
  * Note that strp_done is not called with the lower socket held.
  */
 void strp_done(struct strparser *strp)
index 7f79fb7..66f23b3 100644 (file)
@@ -453,7 +453,7 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
        struct rpc_xprt_switch *xps;
 
        if (args->bc_xprt && args->bc_xprt->xpt_bc_xps) {
-               WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP);
+               WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC));
                xps = args->bc_xprt->xpt_bc_xps;
                xprt_switch_get(xps);
        } else {
@@ -520,7 +520,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
        char servername[48];
 
        if (args->bc_xprt) {
-               WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP);
+               WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC));
                xprt = args->bc_xprt->xpt_bc_xprt;
                if (xprt) {
                        xprt_get(xprt);
index 9e90129..10b8193 100644 (file)
@@ -1042,7 +1042,7 @@ static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj)
        struct nlmsghdr *nlh;
        struct ndmsg *ndm;
 
-       if (dump->idx < dump->cb->args[0])
+       if (dump->idx < dump->cb->args[2])
                goto skip;
 
        nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
@@ -1089,7 +1089,7 @@ nla_put_failure:
  */
 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
                            struct net_device *dev,
-                           struct net_device *filter_dev, int idx)
+                           struct net_device *filter_dev, int *idx)
 {
        struct switchdev_fdb_dump dump = {
                .fdb.obj.orig_dev = dev,
@@ -1097,14 +1097,14 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
                .dev = dev,
                .skb = skb,
                .cb = cb,
-               .idx = idx,
+               .idx = *idx,
        };
        int err;
 
        err = switchdev_port_obj_dump(dev, &dump.fdb.obj,
                                      switchdev_port_fdb_dump_cb);
-       cb->args[1] = err;
-       return dump.idx;
+       *idx = dump.idx;
+       return err;
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
 
@@ -1292,12 +1292,10 @@ bool switchdev_port_same_parent_id(struct net_device *a,
        struct switchdev_attr a_attr = {
                .orig_dev = a,
                .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-               .flags = SWITCHDEV_F_NO_RECURSE,
        };
        struct switchdev_attr b_attr = {
                .orig_dev = b,
                .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-               .flags = SWITCHDEV_F_NO_RECURSE,
        };
 
        if (switchdev_port_attr_get(a, &a_attr) ||
@@ -1307,88 +1305,3 @@ bool switchdev_port_same_parent_id(struct net_device *a,
        return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
 }
 EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
-
-static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
-                                      struct net_device *group_dev)
-{
-       struct net_device *lower_dev;
-       struct list_head *iter;
-
-       netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
-               if (lower_dev == dev)
-                       continue;
-               if (switchdev_port_same_parent_id(dev, lower_dev))
-                       return lower_dev->offload_fwd_mark;
-               return switchdev_port_fwd_mark_get(dev, lower_dev);
-       }
-
-       return dev->ifindex;
-}
-
-static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
-                                         u32 old_mark, u32 *reset_mark)
-{
-       struct net_device *lower_dev;
-       struct list_head *iter;
-
-       netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
-               if (lower_dev->offload_fwd_mark == old_mark) {
-                       if (!*reset_mark)
-                               *reset_mark = lower_dev->ifindex;
-                       lower_dev->offload_fwd_mark = *reset_mark;
-               }
-               switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
-       }
-}
-
-/**
- *     switchdev_port_fwd_mark_set - Set port offload forwarding mark
- *
- *     @dev: port device
- *     @group_dev: containing device
- *     @joining: true if dev is joining group; false if leaving group
- *
- *     An ungrouped port's offload mark is just its ifindex.  A grouped
- *     port's (member of a bridge, for example) offload mark is the ifindex
- *     of one of the ports in the group with the same parent (switch) ID.
- *     Ports on the same device in the same group will have the same mark.
- *
- *     Example:
- *
- *             br0             ifindex=9
- *               sw1p1         ifindex=2       mark=2
- *               sw1p2         ifindex=3       mark=2
- *               sw2p1         ifindex=4       mark=5
- *               sw2p2         ifindex=5       mark=5
- *
- *     If sw2p2 leaves the bridge, we'll have:
- *
- *             br0             ifindex=9
- *               sw1p1         ifindex=2       mark=2
- *               sw1p2         ifindex=3       mark=2
- *               sw2p1         ifindex=4       mark=4
- *             sw2p2           ifindex=5       mark=5
- */
-void switchdev_port_fwd_mark_set(struct net_device *dev,
-                                struct net_device *group_dev,
-                                bool joining)
-{
-       u32 mark = dev->ifindex;
-       u32 reset_mark = 0;
-
-       if (group_dev) {
-               ASSERT_RTNL();
-               if (joining)
-                       mark = switchdev_port_fwd_mark_get(dev, group_dev);
-               else if (dev->offload_fwd_mark == mark)
-                       /* Ohoh, this port was the mark reference port,
-                        * but it's leaving the group, so reset the
-                        * mark for the remaining ports in the group.
-                        */
-                       switchdev_port_fwd_mark_reset(group_dev, mark,
-                                                     &reset_mark);
-       }
-
-       dev->offload_fwd_mark = mark;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);
index ae469b3..753f774 100644 (file)
@@ -269,18 +269,19 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked)
  *
  * RCU is locked, no other locks set
  */
-void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
-                        struct tipc_msg *hdr)
+int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
+                       struct tipc_msg *hdr)
 {
        struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
        struct sk_buff_head xmitq;
+       int rc = 0;
 
        __skb_queue_head_init(&xmitq);
 
        tipc_bcast_lock(net);
        if (msg_type(hdr) == STATE_MSG) {
                tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq);
-               tipc_link_bc_sync_rcv(l, hdr, &xmitq);
+               rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq);
        } else {
                tipc_link_bc_init_rcv(l, hdr);
        }
@@ -291,6 +292,7 @@ void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
        /* Any socket wakeup messages ? */
        if (!skb_queue_empty(inputq))
                tipc_sk_rcv(net, inputq);
+       return rc;
 }
 
 /* tipc_bcast_add_peer - add a peer node to broadcast link and bearer
index d5e79b3..5ffe344 100644 (file)
@@ -56,8 +56,8 @@ int  tipc_bcast_get_mtu(struct net *net);
 int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list);
 int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb);
 void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked);
-void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
-                        struct tipc_msg *hdr);
+int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
+                       struct tipc_msg *hdr);
 int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
 int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
 int tipc_bclink_reset_stats(struct net *net);
index 65b1bbf..975dbeb 100644 (file)
@@ -42,6 +42,7 @@
 #include "monitor.h"
 #include "bcast.h"
 #include "netlink.h"
+#include "udp_media.h"
 
 #define MAX_ADDR_STR 60
 
@@ -56,6 +57,13 @@ static struct tipc_media * const media_info_array[] = {
        NULL
 };
 
+static struct tipc_bearer *bearer_get(struct net *net, int bearer_id)
+{
+       struct tipc_net *tn = tipc_net(net);
+
+       return rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+}
+
 static void bearer_disable(struct net *net, struct tipc_bearer *b);
 
 /**
@@ -323,6 +331,7 @@ restart:
        b->domain = disc_domain;
        b->net_plane = bearer_id + 'A';
        b->priority = priority;
+       test_and_set_bit_lock(0, &b->up);
 
        res = tipc_disc_create(net, b, &b->bcast_addr, &skb);
        if (res) {
@@ -360,15 +369,24 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b)
  */
 void tipc_bearer_reset_all(struct net *net)
 {
-       struct tipc_net *tn = tipc_net(net);
        struct tipc_bearer *b;
        int i;
 
        for (i = 0; i < MAX_BEARERS; i++) {
-               b = rcu_dereference_rtnl(tn->bearer_list[i]);
+               b = bearer_get(net, i);
+               if (b)
+                       clear_bit_unlock(0, &b->up);
+       }
+       for (i = 0; i < MAX_BEARERS; i++) {
+               b = bearer_get(net, i);
                if (b)
                        tipc_reset_bearer(net, b);
        }
+       for (i = 0; i < MAX_BEARERS; i++) {
+               b = bearer_get(net, i);
+               if (b)
+                       test_and_set_bit_lock(0, &b->up);
+       }
 }
 
 /**
@@ -382,8 +400,9 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b)
        int bearer_id = b->identity;
 
        pr_info("Disabling bearer <%s>\n", b->name);
-       b->media->disable_media(b);
+       clear_bit_unlock(0, &b->up);
        tipc_node_delete_links(net, bearer_id);
+       b->media->disable_media(b);
        RCU_INIT_POINTER(b->media_ptr, NULL);
        if (b->link_req)
                tipc_disc_delete(b->link_req);
@@ -440,22 +459,16 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb,
 {
        struct net_device *dev;
        int delta;
-       void *tipc_ptr;
 
        dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr);
        if (!dev)
                return 0;
 
-       /* Send RESET message even if bearer is detached from device */
-       tipc_ptr = rcu_dereference_rtnl(dev->tipc_ptr);
-       if (unlikely(!tipc_ptr && !msg_is_reset(buf_msg(skb))))
-               goto drop;
-
-       delta = dev->hard_header_len - skb_headroom(skb);
-       if ((delta > 0) &&
-           pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
-               goto drop;
-
+       delta = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb));
+       if ((delta > 0) && pskb_expand_head(skb, delta, 0, GFP_ATOMIC)) {
+               kfree_skb(skb);
+               return 0;
+       }
        skb_reset_network_header(skb);
        skb->dev = dev;
        skb->protocol = htons(ETH_P_TIPC);
@@ -463,9 +476,6 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb,
                        dev->dev_addr, skb->len);
        dev_queue_xmit(skb);
        return 0;
-drop:
-       kfree_skb(skb);
-       return 0;
 }
 
 int tipc_bearer_mtu(struct net *net, u32 bearer_id)
@@ -487,12 +497,12 @@ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
                          struct sk_buff *skb,
                          struct tipc_media_addr *dest)
 {
-       struct tipc_net *tn = tipc_net(net);
+       struct tipc_msg *hdr = buf_msg(skb);
        struct tipc_bearer *b;
 
        rcu_read_lock();
-       b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
-       if (likely(b))
+       b = bearer_get(net, bearer_id);
+       if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr))))
                b->media->send_msg(net, skb, b, dest);
        else
                kfree_skb(skb);
@@ -505,7 +515,6 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id,
                      struct sk_buff_head *xmitq,
                      struct tipc_media_addr *dst)
 {
-       struct tipc_net *tn = net_generic(net, tipc_net_id);
        struct tipc_bearer *b;
        struct sk_buff *skb, *tmp;
 
@@ -513,12 +522,15 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id,
                return;
 
        rcu_read_lock();
-       b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+       b = bearer_get(net, bearer_id);
        if (unlikely(!b))
                __skb_queue_purge(xmitq);
        skb_queue_walk_safe(xmitq, skb, tmp) {
                __skb_dequeue(xmitq);
-               b->media->send_msg(net, skb, b, dst);
+               if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb))))
+                       b->media->send_msg(net, skb, b, dst);
+               else
+                       kfree_skb(skb);
        }
        rcu_read_unlock();
 }
@@ -535,8 +547,8 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
        struct tipc_msg *hdr;
 
        rcu_read_lock();
-       b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
-       if (unlikely(!b))
+       b = bearer_get(net, bearer_id);
+       if (unlikely(!b || !test_bit(0, &b->up)))
                __skb_queue_purge(xmitq);
        skb_queue_walk_safe(xmitq, skb, tmp) {
                hdr = buf_msg(skb);
@@ -566,7 +578,8 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
 
        rcu_read_lock();
        b = rcu_dereference_rtnl(dev->tipc_ptr);
-       if (likely(b && (skb->pkt_type <= PACKET_BROADCAST))) {
+       if (likely(b && test_bit(0, &b->up) &&
+                  (skb->pkt_type <= PACKET_BROADCAST))) {
                skb->next = NULL;
                tipc_rcv(dev_net(dev), skb, b);
                rcu_read_unlock();
@@ -591,18 +604,9 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
 {
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct net *net = dev_net(dev);
-       struct tipc_net *tn = tipc_net(net);
        struct tipc_bearer *b;
-       int i;
 
        b = rtnl_dereference(dev->tipc_ptr);
-       if (!b) {
-               for (i = 0; i < MAX_BEARERS; b = NULL, i++) {
-                       b = rtnl_dereference(tn->bearer_list[i]);
-                       if (b && (b->media_ptr == dev))
-                               break;
-               }
-       }
        if (!b)
                return NOTIFY_DONE;
 
@@ -613,11 +617,10 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
                if (netif_carrier_ok(dev))
                        break;
        case NETDEV_UP:
-               rcu_assign_pointer(dev->tipc_ptr, b);
+               test_and_set_bit_lock(0, &b->up);
                break;
        case NETDEV_GOING_DOWN:
-               RCU_INIT_POINTER(dev->tipc_ptr, NULL);
-               synchronize_net();
+               clear_bit_unlock(0, &b->up);
                tipc_reset_bearer(net, b);
                break;
        case NETDEV_CHANGEMTU:
@@ -709,6 +712,14 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg,
                goto prop_msg_full;
 
        nla_nest_end(msg->skb, prop);
+
+#ifdef CONFIG_TIPC_MEDIA_UDP
+       if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) {
+               if (tipc_udp_nl_add_bearer_data(msg, bearer))
+                       goto attr_msg_full;
+       }
+#endif
+
        nla_nest_end(msg->skb, attrs);
        genlmsg_end(msg->skb, hdr);
 
@@ -895,6 +906,49 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
        return 0;
 }
 
+int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
+{
+       int err;
+       char *name;
+       struct tipc_bearer *b;
+       struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+       struct net *net = sock_net(skb->sk);
+
+       if (!info->attrs[TIPC_NLA_BEARER])
+               return -EINVAL;
+
+       err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
+                              info->attrs[TIPC_NLA_BEARER],
+                              tipc_nl_bearer_policy);
+       if (err)
+               return err;
+
+       if (!attrs[TIPC_NLA_BEARER_NAME])
+               return -EINVAL;
+       name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
+
+       rtnl_lock();
+       b = tipc_bearer_find(net, name);
+       if (!b) {
+               rtnl_unlock();
+               return -EINVAL;
+       }
+
+#ifdef CONFIG_TIPC_MEDIA_UDP
+       if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) {
+               err = tipc_udp_nl_bearer_add(b,
+                                            attrs[TIPC_NLA_BEARER_UDP_OPTS]);
+               if (err) {
+                       rtnl_unlock();
+                       return err;
+               }
+       }
+#endif
+       rtnl_unlock();
+
+       return 0;
+}
+
 int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
 {
        int err;
index 43757f1..78892e2 100644 (file)
@@ -150,6 +150,7 @@ struct tipc_bearer {
        u32 identity;
        struct tipc_link_req *link_req;
        char net_plane;
+       unsigned long up;
 };
 
 struct tipc_bearer_names {
@@ -180,6 +181,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info);
 
 int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info);
index 877d94f..b36e16c 100644 (file)
@@ -181,7 +181,10 @@ struct tipc_link {
        u16 acked;
        struct tipc_link *bc_rcvlink;
        struct tipc_link *bc_sndlink;
-       int nack_state;
+       unsigned long prev_retr;
+       u16 prev_from;
+       u16 prev_to;
+       u8 nack_state;
        bool bc_peer_is_up;
 
        /* Statistics */
@@ -202,6 +205,8 @@ enum {
        BC_NACK_SND_SUPPRESS,
 };
 
+#define TIPC_BC_RETR_LIMIT 10   /* [ms] */
+
 /*
  * Interval between NACKs when packets arrive out of order
  */
@@ -237,8 +242,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
                                      u16 rcvgap, int tolerance, int priority,
                                      struct sk_buff_head *xmitq);
 static void link_print(struct tipc_link *l, const char *str);
-static void tipc_link_build_nack_msg(struct tipc_link *l,
-                                    struct sk_buff_head *xmitq);
+static int tipc_link_build_nack_msg(struct tipc_link *l,
+                                   struct sk_buff_head *xmitq);
 static void tipc_link_build_bc_init_msg(struct tipc_link *l,
                                        struct sk_buff_head *xmitq);
 static bool tipc_link_release_pkts(struct tipc_link *l, u16 to);
@@ -367,6 +372,18 @@ int tipc_link_bc_peers(struct tipc_link *l)
        return l->ackers;
 }
 
+u16 link_bc_rcv_gap(struct tipc_link *l)
+{
+       struct sk_buff *skb = skb_peek(&l->deferdq);
+       u16 gap = 0;
+
+       if (more(l->snd_nxt, l->rcv_nxt))
+               gap = l->snd_nxt - l->rcv_nxt;
+       if (skb)
+               gap = buf_seqno(skb) - l->rcv_nxt;
+       return gap;
+}
+
 void tipc_link_set_mtu(struct tipc_link *l, int mtu)
 {
        l->mtu = mtu;
@@ -807,7 +824,7 @@ void link_prepare_wakeup(struct tipc_link *l)
 
        skb_queue_walk_safe(&l->wakeupq, skb, tmp) {
                imp = TIPC_SKB_CB(skb)->chain_imp;
-               lim = l->window + l->backlog[imp].limit;
+               lim = l->backlog[imp].limit;
                pnd[imp] += TIPC_SKB_CB(skb)->chain_sz;
                if ((pnd[imp] + l->backlog[imp].len) >= lim)
                        break;
@@ -873,9 +890,11 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
        struct sk_buff *skb, *_skb, *bskb;
 
        /* Match msg importance against this and all higher backlog limits: */
-       for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
-               if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
-                       return link_schedule_user(l, list);
+       if (!skb_queue_empty(backlogq)) {
+               for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
+                       if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
+                               return link_schedule_user(l, list);
+               }
        }
        if (unlikely(msg_size(hdr) > mtu)) {
                skb_queue_purge(list);
@@ -1133,7 +1152,10 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
                if (((l->rcv_nxt ^ tipc_own_addr(l->net)) & 0xf) != 0xf)
                        return 0;
                l->rcv_unacked = 0;
-               return TIPC_LINK_SND_BC_ACK;
+
+               /* Use snd_nxt to store peer's snd_nxt in broadcast rcv link */
+               l->snd_nxt = l->rcv_nxt;
+               return TIPC_LINK_SND_STATE;
        }
 
        /* Unicast ACK */
@@ -1162,17 +1184,26 @@ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
 }
 
 /* tipc_link_build_nack_msg: prepare link nack message for transmission
+ * Note that sending of broadcast NACK is coordinated among nodes, to
+ * reduce the risk of NACK storms towards the sender
  */
-static void tipc_link_build_nack_msg(struct tipc_link *l,
-                                    struct sk_buff_head *xmitq)
+static int tipc_link_build_nack_msg(struct tipc_link *l,
+                                   struct sk_buff_head *xmitq)
 {
        u32 def_cnt = ++l->stats.deferred_recv;
+       int match1, match2;
 
-       if (link_is_bc_rcvlink(l))
-               return;
+       if (link_is_bc_rcvlink(l)) {
+               match1 = def_cnt & 0xf;
+               match2 = tipc_own_addr(l->net) & 0xf;
+               if (match1 == match2)
+                       return TIPC_LINK_SND_STATE;
+               return 0;
+       }
 
        if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
                tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+       return 0;
 }
 
 /* tipc_link_rcv - process TIPC packets/messages arriving from off-node
@@ -1223,7 +1254,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
                /* Defer delivery if sequence gap */
                if (unlikely(seqno != rcv_nxt)) {
                        __tipc_skb_queue_sorted(defq, seqno, skb);
-                       tipc_link_build_nack_msg(l, xmitq);
+                       rc |= tipc_link_build_nack_msg(l, xmitq);
                        break;
                }
 
@@ -1234,7 +1265,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
                        rc |= tipc_link_input(l, skb, l->inputq);
                if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN))
                        rc |= tipc_link_build_state_msg(l, xmitq);
-               if (unlikely(rc & ~TIPC_LINK_SND_BC_ACK))
+               if (unlikely(rc & ~TIPC_LINK_SND_STATE))
                        break;
        } while ((skb = __skb_dequeue(defq)));
 
@@ -1248,10 +1279,11 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
                                      u16 rcvgap, int tolerance, int priority,
                                      struct sk_buff_head *xmitq)
 {
+       struct tipc_link *bcl = l->bc_rcvlink;
        struct sk_buff *skb;
        struct tipc_msg *hdr;
        struct sk_buff_head *dfq = &l->deferdq;
-       bool node_up = link_is_up(l->bc_rcvlink);
+       bool node_up = link_is_up(bcl);
        struct tipc_mon_state *mstate = &l->mon_state;
        int dlen = 0;
        void *data;
@@ -1279,7 +1311,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
        msg_set_net_plane(hdr, l->net_plane);
        msg_set_next_sent(hdr, l->snd_nxt);
        msg_set_ack(hdr, l->rcv_nxt - 1);
-       msg_set_bcast_ack(hdr, l->bc_rcvlink->rcv_nxt - 1);
+       msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1);
        msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1);
        msg_set_link_tolerance(hdr, tolerance);
        msg_set_linkprio(hdr, priority);
@@ -1289,6 +1321,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
 
        if (mtyp == STATE_MSG) {
                msg_set_seq_gap(hdr, rcvgap);
+               msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
                msg_set_probe(hdr, probe);
                tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id);
                msg_set_size(hdr, INT_H_SIZE + dlen);
@@ -1571,51 +1604,107 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr)
                l->rcv_nxt = peers_snd_nxt;
 }
 
+/* link_bc_retr eval()- check if the indicated range can be retransmitted now
+ * - Adjust permitted range if there is overlap with previous retransmission
+ */
+static bool link_bc_retr_eval(struct tipc_link *l, u16 *from, u16 *to)
+{
+       unsigned long elapsed = jiffies_to_msecs(jiffies - l->prev_retr);
+
+       if (less(*to, *from))
+               return false;
+
+       /* New retransmission request */
+       if ((elapsed > TIPC_BC_RETR_LIMIT) ||
+           less(*to, l->prev_from) || more(*from, l->prev_to)) {
+               l->prev_from = *from;
+               l->prev_to = *to;
+               l->prev_retr = jiffies;
+               return true;
+       }
+
+       /* Inside range of previous retransmit */
+       if (!less(*from, l->prev_from) && !more(*to, l->prev_to))
+               return false;
+
+       /* Fully or partially outside previous range => exclude overlap */
+       if (less(*from, l->prev_from)) {
+               *to = l->prev_from - 1;
+               l->prev_from = *from;
+       }
+       if (more(*to, l->prev_to)) {
+               *from = l->prev_to + 1;
+               l->prev_to = *to;
+       }
+       l->prev_retr = jiffies;
+       return true;
+}
+
 /* tipc_link_bc_sync_rcv - update rcv link according to peer's send state
  */
-void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
-                          struct sk_buff_head *xmitq)
+int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
+                         struct sk_buff_head *xmitq)
 {
+       struct tipc_link *snd_l = l->bc_sndlink;
        u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
+       u16 from = msg_bcast_ack(hdr) + 1;
+       u16 to = from + msg_bc_gap(hdr) - 1;
+       int rc = 0;
 
        if (!link_is_up(l))
-               return;
+               return rc;
 
        if (!msg_peer_node_is_up(hdr))
-               return;
+               return rc;
 
        /* Open when peer ackowledges our bcast init msg (pkt #1) */
        if (msg_ack(hdr))
                l->bc_peer_is_up = true;
 
        if (!l->bc_peer_is_up)
-               return;
+               return rc;
+
+       l->stats.recv_nacks++;
 
        /* Ignore if peers_snd_nxt goes beyond receive window */
        if (more(peers_snd_nxt, l->rcv_nxt + l->window))
-               return;
+               return rc;
+
+       if (link_bc_retr_eval(snd_l, &from, &to))
+               rc = tipc_link_retrans(snd_l, from, to, xmitq);
+
+       l->snd_nxt = peers_snd_nxt;
+       if (link_bc_rcv_gap(l))
+               rc |= TIPC_LINK_SND_STATE;
+
+       /* Return now if sender supports nack via STATE messages */
+       if (l->peer_caps & TIPC_BCAST_STATE_NACK)
+               return rc;
+
+       /* Otherwise, be backwards compatible */
 
        if (!more(peers_snd_nxt, l->rcv_nxt)) {
                l->nack_state = BC_NACK_SND_CONDITIONAL;
-               return;
+               return 0;
        }
 
        /* Don't NACK if one was recently sent or peeked */
        if (l->nack_state == BC_NACK_SND_SUPPRESS) {
                l->nack_state = BC_NACK_SND_UNCONDITIONAL;
-               return;
+               return 0;
        }
 
        /* Conditionally delay NACK sending until next synch rcv */
        if (l->nack_state == BC_NACK_SND_CONDITIONAL) {
                l->nack_state = BC_NACK_SND_UNCONDITIONAL;
                if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN)
-                       return;
+                       return 0;
        }
 
        /* Send NACK now but suppress next one */
        tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq);
        l->nack_state = BC_NACK_SND_SUPPRESS;
+       return 0;
 }
 
 void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
@@ -1652,6 +1741,8 @@ void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
 }
 
 /* tipc_link_bc_nack_rcv(): receive broadcast nack message
+ * This function is here for backwards compatibility, since
+ * no BCAST_PROTOCOL/STATE messages occur from TIPC v2.5.
  */
 int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
                          struct sk_buff_head *xmitq)
@@ -1692,10 +1783,10 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win)
        int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE);
 
        l->window = win;
-       l->backlog[TIPC_LOW_IMPORTANCE].limit      = win / 2;
-       l->backlog[TIPC_MEDIUM_IMPORTANCE].limit   = win;
-       l->backlog[TIPC_HIGH_IMPORTANCE].limit     = win / 2 * 3;
-       l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = win * 2;
+       l->backlog[TIPC_LOW_IMPORTANCE].limit      = max_t(u16, 50, win);
+       l->backlog[TIPC_MEDIUM_IMPORTANCE].limit   = max_t(u16, 100, win * 2);
+       l->backlog[TIPC_HIGH_IMPORTANCE].limit     = max_t(u16, 150, win * 3);
+       l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = max_t(u16, 200, win * 4);
        l->backlog[TIPC_SYSTEM_IMPORTANCE].limit   = max_bulk;
 }
 
index d7e9d42..d1bd178 100644 (file)
@@ -63,7 +63,7 @@ enum {
 enum {
        TIPC_LINK_UP_EVT       = 1,
        TIPC_LINK_DOWN_EVT     = (1 << 1),
-       TIPC_LINK_SND_BC_ACK   = (1 << 2)
+       TIPC_LINK_SND_STATE    = (1 << 2)
 };
 
 /* Starting value for maximum packet size negotiation on unicast links
@@ -138,8 +138,8 @@ void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
 void tipc_link_build_bc_sync_msg(struct tipc_link *l,
                                 struct sk_buff_head *xmitq);
 void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr);
-void tipc_link_bc_sync_rcv(struct tipc_link *l,   struct tipc_msg *hdr,
-                          struct sk_buff_head *xmitq);
+int tipc_link_bc_sync_rcv(struct tipc_link *l,   struct tipc_msg *hdr,
+                         struct sk_buff_head *xmitq);
 int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
                          struct sk_buff_head *xmitq);
 #endif
index 7cf52fb..c3832cd 100644 (file)
@@ -719,6 +719,16 @@ static inline char *msg_media_addr(struct tipc_msg *m)
        return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET];
 }
 
+static inline u32 msg_bc_gap(struct tipc_msg *m)
+{
+       return msg_bits(m, 8, 0, 0x3ff);
+}
+
+static inline void msg_set_bc_gap(struct tipc_msg *m, u32 n)
+{
+       msg_set_bits(m, 8, 0, 0x3ff, n);
+}
+
 /*
  * Word 9
  */
index 6b626a6..a04fe9b 100644 (file)
@@ -62,6 +62,8 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
 
 /**
  * named_prepare_buf - allocate & initialize a publication message
+ *
+ * The buffer returned is of size INT_H_SIZE + payload size
  */
 static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
                                         u32 dest)
@@ -141,9 +143,9 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
        struct publication *publ;
        struct sk_buff *skb = NULL;
        struct distr_item *item = NULL;
-       uint msg_dsz = (tipc_node_get_mtu(net, dnode, 0) / ITEM_SIZE) *
-                       ITEM_SIZE;
-       uint msg_rem = msg_dsz;
+       u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0) - INT_H_SIZE) /
+                       ITEM_SIZE) * ITEM_SIZE;
+       u32 msg_rem = msg_dsz;
 
        list_for_each_entry(publ, pls, local_list) {
                /* Prepare next buffer: */
index 77a7a11..c7c2549 100644 (file)
@@ -39,6 +39,8 @@
 
 #include <net/genetlink.h>
 
+extern const struct nla_policy tipc_nl_net_policy[];
+
 int tipc_net_start(struct net *net, u32 addr);
 
 void tipc_net_stop(struct net *net);
index a84daec..3200059 100644 (file)
@@ -41,6 +41,7 @@
 #include "link.h"
 #include "node.h"
 #include "net.h"
+#include "udp_media.h"
 #include <net/genetlink.h>
 
 static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = {
@@ -160,6 +161,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
                .dumpit = tipc_nl_bearer_dump,
                .policy = tipc_nl_policy,
        },
+       {
+               .cmd    = TIPC_NL_BEARER_ADD,
+               .doit   = tipc_nl_bearer_add,
+               .policy = tipc_nl_policy,
+       },
        {
                .cmd    = TIPC_NL_BEARER_SET,
                .doit   = tipc_nl_bearer_set,
@@ -238,6 +244,18 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
                .dumpit = tipc_nl_node_dump_monitor_peer,
                .policy = tipc_nl_policy,
        },
+       {
+               .cmd    = TIPC_NL_PEER_REMOVE,
+               .doit   = tipc_nl_peer_rm,
+               .policy = tipc_nl_policy,
+       },
+#ifdef CONFIG_TIPC_MEDIA_UDP
+       {
+               .cmd    = TIPC_NL_UDP_GET_REMOTEIP,
+               .dumpit = tipc_udp_nl_dump_remoteip,
+               .policy = tipc_nl_policy,
+       },
+#endif
 };
 
 int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
index 2197419..7ef14e2 100644 (file)
@@ -1262,6 +1262,34 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
        kfree_skb(skb);
 }
 
+static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr,
+                                 int bearer_id, struct sk_buff_head *xmitq)
+{
+       struct tipc_link *ucl;
+       int rc;
+
+       rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr);
+
+       if (rc & TIPC_LINK_DOWN_EVT) {
+               tipc_bearer_reset_all(n->net);
+               return;
+       }
+
+       if (!(rc & TIPC_LINK_SND_STATE))
+               return;
+
+       /* If probe message, a STATE response will be sent anyway */
+       if (msg_probe(hdr))
+               return;
+
+       /* Produce a STATE message carrying broadcast NACK */
+       tipc_node_read_lock(n);
+       ucl = n->links[bearer_id].link;
+       if (ucl)
+               tipc_link_build_state_msg(ucl, xmitq);
+       tipc_node_read_unlock(n);
+}
+
 /**
  * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node
  * @net: the applicable net namespace
@@ -1298,7 +1326,7 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
        rc = tipc_bcast_rcv(net, be->link, skb);
 
        /* Broadcast ACKs are sent on a unicast link */
-       if (rc & TIPC_LINK_SND_BC_ACK) {
+       if (rc & TIPC_LINK_SND_STATE) {
                tipc_node_read_lock(n);
                tipc_link_build_state_msg(le->link, &xmitq);
                tipc_node_read_unlock(n);
@@ -1505,7 +1533,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
 
        /* Ensure broadcast reception is in synch with peer's send state */
        if (unlikely(usr == LINK_PROTOCOL))
-               tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr);
+               tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq);
        else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack))
                tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack);
 
@@ -1553,6 +1581,69 @@ discard:
        kfree_skb(skb);
 }
 
+int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
+{
+       struct net *net = sock_net(skb->sk);
+       struct tipc_net *tn = net_generic(net, tipc_net_id);
+       struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
+       struct tipc_node *peer;
+       u32 addr;
+       int err;
+       int i;
+
+       /* We identify the peer by its net */
+       if (!info->attrs[TIPC_NLA_NET])
+               return -EINVAL;
+
+       err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX,
+                              info->attrs[TIPC_NLA_NET],
+                              tipc_nl_net_policy);
+       if (err)
+               return err;
+
+       if (!attrs[TIPC_NLA_NET_ADDR])
+               return -EINVAL;
+
+       addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
+
+       if (in_own_node(net, addr))
+               return -ENOTSUPP;
+
+       spin_lock_bh(&tn->node_list_lock);
+       peer = tipc_node_find(net, addr);
+       if (!peer) {
+               spin_unlock_bh(&tn->node_list_lock);
+               return -ENXIO;
+       }
+
+       tipc_node_write_lock(peer);
+       if (peer->state != SELF_DOWN_PEER_DOWN &&
+           peer->state != SELF_DOWN_PEER_LEAVING) {
+               tipc_node_write_unlock(peer);
+               err = -EBUSY;
+               goto err_out;
+       }
+
+       for (i = 0; i < MAX_BEARERS; i++) {
+               struct tipc_link_entry *le = &peer->links[i];
+
+               if (le->link) {
+                       kfree(le->link);
+                       le->link = NULL;
+                       peer->link_cnt--;
+               }
+       }
+       tipc_node_write_unlock(peer);
+       tipc_node_delete(peer);
+
+       err = 0;
+err_out:
+       tipc_node_put(peer);
+       spin_unlock_bh(&tn->node_list_lock);
+
+       return err;
+}
+
 int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
        int err;
index d69fdfc..39ef54c 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * net/tipc/node.h: Include file for TIPC node management routines
  *
- * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2016, Ericsson AB
  * Copyright (c) 2005, 2010-2014, Wind River Systems
  * All rights reserved.
  *
 /* Optional capabilities supported by this code version
  */
 enum {
-       TIPC_BCAST_SYNCH   = (1 << 1),
-       TIPC_BLOCK_FLOWCTL = (2 << 1)
+       TIPC_BCAST_SYNCH      = (1 << 1),
+       TIPC_BCAST_STATE_NACK = (1 << 2),
+       TIPC_BLOCK_FLOWCTL    = (1 << 3)
 };
 
-#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | TIPC_BLOCK_FLOWCTL)
+#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
+                               TIPC_BCAST_STATE_NACK | \
+                               TIPC_BLOCK_FLOWCTL)
 #define INVALID_BEARER_ID -1
 
 void tipc_node_stop(struct net *net);
@@ -77,6 +80,7 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info);
 
 int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info);
index b016c01..d80cd3f 100644 (file)
@@ -49,6 +49,7 @@
 #include "core.h"
 #include "bearer.h"
 #include "netlink.h"
+#include "msg.h"
 
 /* IANA assigned UDP port */
 #define UDP_PORT_DEFAULT       6118
@@ -70,6 +71,13 @@ struct udp_media_addr {
        };
 };
 
+/* struct udp_replicast - container for UDP remote addresses */
+struct udp_replicast {
+       struct udp_media_addr addr;
+       struct rcu_head rcu;
+       struct list_head list;
+};
+
 /**
  * struct udp_bearer - ip/udp bearer data structure
  * @bearer:    associated generic tipc bearer
@@ -82,8 +90,20 @@ struct udp_bearer {
        struct socket *ubsock;
        u32 ifindex;
        struct work_struct work;
+       struct udp_replicast rcast;
 };
 
+static int tipc_udp_is_mcast_addr(struct udp_media_addr *addr)
+{
+       if (ntohs(addr->proto) == ETH_P_IP)
+               return ipv4_is_multicast(addr->ipv4.s_addr);
+#if IS_ENABLED(CONFIG_IPV6)
+       else
+               return ipv6_addr_is_multicast(&addr->ipv6);
+#endif
+       return 0;
+}
+
 /* udp_media_addr_set - convert a ip/udp address to a TIPC media address */
 static void tipc_udp_media_addr_set(struct tipc_media_addr *addr,
                                    struct udp_media_addr *ua)
@@ -91,15 +111,9 @@ static void tipc_udp_media_addr_set(struct tipc_media_addr *addr,
        memset(addr, 0, sizeof(struct tipc_media_addr));
        addr->media_id = TIPC_MEDIA_TYPE_UDP;
        memcpy(addr->value, ua, sizeof(struct udp_media_addr));
-       if (ntohs(ua->proto) == ETH_P_IP) {
-               if (ipv4_is_multicast(ua->ipv4.s_addr))
-                       addr->broadcast = 1;
-       } else if (ntohs(ua->proto) == ETH_P_IPV6) {
-               if (ipv6_addr_type(&ua->ipv6) & IPV6_ADDR_MULTICAST)
-                       addr->broadcast = 1;
-       } else {
-               pr_err("Invalid UDP media address\n");
-       }
+
+       if (tipc_udp_is_mcast_addr(ua))
+               addr->broadcast = 1;
 }
 
 /* tipc_udp_addr2str - convert ip/udp address to string */
@@ -140,28 +154,13 @@ static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a)
 }
 
 /* tipc_send_msg - enqueue a send request */
-static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
-                            struct tipc_bearer *b,
-                            struct tipc_media_addr *dest)
+static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
+                        struct udp_bearer *ub, struct udp_media_addr *src,
+                        struct udp_media_addr *dst)
 {
        int ttl, err = 0;
-       struct udp_bearer *ub;
-       struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value;
-       struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
        struct rtable *rt;
 
-       if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
-               err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
-               if (err)
-                       goto tx_error;
-       }
-
-       skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
-       ub = rcu_dereference_rtnl(b->media_ptr);
-       if (!ub) {
-               err = -ENODEV;
-               goto tx_error;
-       }
        if (dst->proto == htons(ETH_P_IP)) {
                struct flowi4 fl = {
                        .daddr = dst->ipv4.s_addr,
@@ -207,29 +206,178 @@ tx_error:
        return err;
 }
 
+static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
+                            struct tipc_bearer *b,
+                            struct tipc_media_addr *addr)
+{
+       struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
+       struct udp_media_addr *dst = (struct udp_media_addr *)&addr->value;
+       struct udp_replicast *rcast;
+       struct udp_bearer *ub;
+       int err = 0;
+
+       if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
+               err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+               if (err)
+                       goto out;
+       }
+
+       skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
+       ub = rcu_dereference_rtnl(b->media_ptr);
+       if (!ub) {
+               err = -ENODEV;
+               goto out;
+       }
+
+       if (!addr->broadcast || list_empty(&ub->rcast.list))
+               return tipc_udp_xmit(net, skb, ub, src, dst);
+
+       /* Replicast, send an skb to each configured IP address */
+       list_for_each_entry_rcu(rcast, &ub->rcast.list, list) {
+               struct sk_buff *_skb;
+
+               _skb = pskb_copy(skb, GFP_ATOMIC);
+               if (!_skb) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+
+               err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr);
+               if (err) {
+                       kfree_skb(_skb);
+                       goto out;
+               }
+       }
+       err = 0;
+out:
+       kfree_skb(skb);
+       return err;
+}
+
+static bool tipc_udp_is_known_peer(struct tipc_bearer *b,
+                                  struct udp_media_addr *addr)
+{
+       struct udp_replicast *rcast, *tmp;
+       struct udp_bearer *ub;
+
+       ub = rcu_dereference_rtnl(b->media_ptr);
+       if (!ub) {
+               pr_err_ratelimited("UDP bearer instance not found\n");
+               return false;
+       }
+
+       list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+               if (!memcmp(&rcast->addr, addr, sizeof(struct udp_media_addr)))
+                       return true;
+       }
+
+       return false;
+}
+
+static int tipc_udp_rcast_add(struct tipc_bearer *b,
+                             struct udp_media_addr *addr)
+{
+       struct udp_replicast *rcast;
+       struct udp_bearer *ub;
+
+       ub = rcu_dereference_rtnl(b->media_ptr);
+       if (!ub)
+               return -ENODEV;
+
+       rcast = kmalloc(sizeof(*rcast), GFP_ATOMIC);
+       if (!rcast)
+               return -ENOMEM;
+
+       memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr));
+
+       if (ntohs(addr->proto) == ETH_P_IP)
+               pr_info("New replicast peer: %pI4\n", &rcast->addr.ipv4);
+#if IS_ENABLED(CONFIG_IPV6)
+       else if (ntohs(addr->proto) == ETH_P_IPV6)
+               pr_info("New replicast peer: %pI6\n", &rcast->addr.ipv6);
+#endif
+
+       list_add_rcu(&rcast->list, &ub->rcast.list);
+       return 0;
+}
+
+static int tipc_udp_rcast_disc(struct tipc_bearer *b, struct sk_buff *skb)
+{
+       struct udp_media_addr src = {0};
+       struct udp_media_addr *dst;
+
+       dst = (struct udp_media_addr *)&b->bcast_addr.value;
+       if (tipc_udp_is_mcast_addr(dst))
+               return 0;
+
+       src.port = udp_hdr(skb)->source;
+
+       if (ip_hdr(skb)->version == 4) {
+               struct iphdr *iphdr = ip_hdr(skb);
+
+               src.proto = htons(ETH_P_IP);
+               src.ipv4.s_addr = iphdr->saddr;
+               if (ipv4_is_multicast(iphdr->daddr))
+                       return 0;
+#if IS_ENABLED(CONFIG_IPV6)
+       } else if (ip_hdr(skb)->version == 6) {
+               struct ipv6hdr *iphdr = ipv6_hdr(skb);
+
+               src.proto = htons(ETH_P_IPV6);
+               src.ipv6 = iphdr->saddr;
+               if (ipv6_addr_is_multicast(&iphdr->daddr))
+                       return 0;
+#endif
+       } else {
+               return 0;
+       }
+
+       if (likely(tipc_udp_is_known_peer(b, &src)))
+               return 0;
+
+       return tipc_udp_rcast_add(b, &src);
+}
+
 /* tipc_udp_recv - read data from bearer socket */
 static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
 {
        struct udp_bearer *ub;
        struct tipc_bearer *b;
+       struct tipc_msg *hdr;
+       int err;
 
        ub = rcu_dereference_sk_user_data(sk);
        if (!ub) {
                pr_err_ratelimited("Failed to get UDP bearer reference");
-               kfree_skb(skb);
-               return 0;
+               goto out;
        }
-
        skb_pull(skb, sizeof(struct udphdr));
+       hdr = buf_msg(skb);
+
        rcu_read_lock();
        b = rcu_dereference_rtnl(ub->bearer);
+       if (!b)
+               goto rcu_out;
 
-       if (b) {
+       if (b && test_bit(0, &b->up)) {
                tipc_rcv(sock_net(sk), skb, b);
                rcu_read_unlock();
                return 0;
        }
+
+       if (unlikely(msg_user(hdr) == LINK_CONFIG)) {
+               err = tipc_udp_rcast_disc(b, skb);
+               if (err)
+                       goto rcu_out;
+       }
+
+       tipc_rcv(sock_net(sk), skb, b);
        rcu_read_unlock();
+       return 0;
+
+rcu_out:
+       rcu_read_unlock();
+out:
        kfree_skb(skb);
        return 0;
 }
@@ -241,15 +389,11 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote)
        struct sock *sk = ub->ubsock->sk;
 
        if (ntohs(remote->proto) == ETH_P_IP) {
-               if (!ipv4_is_multicast(remote->ipv4.s_addr))
-                       return 0;
                mreqn.imr_multiaddr = remote->ipv4;
                mreqn.imr_ifindex = ub->ifindex;
                err = ip_mc_join_group(sk, &mreqn);
 #if IS_ENABLED(CONFIG_IPV6)
        } else {
-               if (!ipv6_addr_is_multicast(&remote->ipv6))
-                       return 0;
                err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex,
                                                   &remote->ipv6);
 #endif
@@ -257,75 +401,234 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote)
        return err;
 }
 
-/**
- * parse_options - build local/remote addresses from configuration
- * @attrs:     netlink config data
- * @ub:                UDP bearer instance
- * @local:     local bearer IP address/port
- * @remote:    peer or multicast IP/port
- */
-static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub,
-                        struct udp_media_addr *local,
-                        struct udp_media_addr *remote)
+static int __tipc_nl_add_udp_addr(struct sk_buff *skb,
+                                 struct udp_media_addr *addr, int nla_t)
 {
-       struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
-       struct sockaddr_storage sa_local, sa_remote;
+       if (ntohs(addr->proto) == ETH_P_IP) {
+               struct sockaddr_in ip4;
 
-       if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
-               goto err;
-       if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX,
-                            attrs[TIPC_NLA_BEARER_UDP_OPTS],
-                            tipc_nl_udp_policy))
-               goto err;
-       if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) {
-               nla_memcpy(&sa_local, opts[TIPC_NLA_UDP_LOCAL],
-                          sizeof(sa_local));
-               nla_memcpy(&sa_remote, opts[TIPC_NLA_UDP_REMOTE],
-                          sizeof(sa_remote));
+               ip4.sin_family = AF_INET;
+               ip4.sin_port = addr->port;
+               ip4.sin_addr.s_addr = addr->ipv4.s_addr;
+               if (nla_put(skb, nla_t, sizeof(ip4), &ip4))
+                       return -EMSGSIZE;
+
+#if IS_ENABLED(CONFIG_IPV6)
+       } else if (ntohs(addr->proto) == ETH_P_IPV6) {
+               struct sockaddr_in6 ip6;
+
+               ip6.sin6_family = AF_INET6;
+               ip6.sin6_port  = addr->port;
+               memcpy(&ip6.sin6_addr, &addr->ipv6, sizeof(struct in6_addr));
+               if (nla_put(skb, nla_t, sizeof(ip6), &ip6))
+                       return -EMSGSIZE;
+#endif
+       }
+
+       return 0;
+}
+
+int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       u32 bid = cb->args[0];
+       u32 skip_cnt = cb->args[1];
+       u32 portid = NETLINK_CB(cb->skb).portid;
+       struct udp_replicast *rcast, *tmp;
+       struct tipc_bearer *b;
+       struct udp_bearer *ub;
+       void *hdr;
+       int err;
+       int i;
+
+       if (!bid && !skip_cnt) {
+               struct net *net = sock_net(skb->sk);
+               struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1];
+               struct nlattr **attrs;
+               char *bname;
+
+               err = tipc_nlmsg_parse(cb->nlh, &attrs);
+               if (err)
+                       return err;
+
+               if (!attrs[TIPC_NLA_BEARER])
+                       return -EINVAL;
+
+               err = nla_parse_nested(battrs, TIPC_NLA_BEARER_MAX,
+                                      attrs[TIPC_NLA_BEARER],
+                                      tipc_nl_bearer_policy);
+               if (err)
+                       return err;
+
+               if (!battrs[TIPC_NLA_BEARER_NAME])
+                       return -EINVAL;
+
+               bname = nla_data(battrs[TIPC_NLA_BEARER_NAME]);
+
+               rtnl_lock();
+               b = tipc_bearer_find(net, bname);
+               if (!b) {
+                       rtnl_unlock();
+                       return -EINVAL;
+               }
+               bid = b->identity;
        } else {
-err:
-               pr_err("Invalid UDP bearer configuration");
+               struct net *net = sock_net(skb->sk);
+               struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+               rtnl_lock();
+               b = rtnl_dereference(tn->bearer_list[bid]);
+               if (!b) {
+                       rtnl_unlock();
+                       return -EINVAL;
+               }
+       }
+
+       ub = rcu_dereference_rtnl(b->media_ptr);
+       if (!ub) {
+               rtnl_unlock();
                return -EINVAL;
        }
-       if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET) {
-               struct sockaddr_in *ip4;
-
-               ip4 = (struct sockaddr_in *)&sa_local;
-               local->proto = htons(ETH_P_IP);
-               local->port = ip4->sin_port;
-               local->ipv4.s_addr = ip4->sin_addr.s_addr;
-
-               ip4 = (struct sockaddr_in *)&sa_remote;
-               remote->proto = htons(ETH_P_IP);
-               remote->port = ip4->sin_port;
-               remote->ipv4.s_addr = ip4->sin_addr.s_addr;
+
+       i = 0;
+       list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+               if (i < skip_cnt)
+                       goto count;
+
+               hdr = genlmsg_put(skb, portid, cb->nlh->nlmsg_seq,
+                                 &tipc_genl_family, NLM_F_MULTI,
+                                 TIPC_NL_BEARER_GET);
+               if (!hdr)
+                       goto done;
+
+               err = __tipc_nl_add_udp_addr(skb, &rcast->addr,
+                                            TIPC_NLA_UDP_REMOTE);
+               if (err) {
+                       genlmsg_cancel(skb, hdr);
+                       goto done;
+               }
+               genlmsg_end(skb, hdr);
+count:
+               i++;
+       }
+done:
+       rtnl_unlock();
+       cb->args[0] = bid;
+       cb->args[1] = i;
+
+       return skb->len;
+}
+
+int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b)
+{
+       struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
+       struct udp_media_addr *dst;
+       struct udp_bearer *ub;
+       struct nlattr *nest;
+
+       ub = rcu_dereference_rtnl(b->media_ptr);
+       if (!ub)
+               return -ENODEV;
+
+       nest = nla_nest_start(msg->skb, TIPC_NLA_BEARER_UDP_OPTS);
+       if (!nest)
+               goto msg_full;
+
+       if (__tipc_nl_add_udp_addr(msg->skb, src, TIPC_NLA_UDP_LOCAL))
+               goto msg_full;
+
+       dst = (struct udp_media_addr *)&b->bcast_addr.value;
+       if (__tipc_nl_add_udp_addr(msg->skb, dst, TIPC_NLA_UDP_REMOTE))
+               goto msg_full;
+
+       if (!list_empty(&ub->rcast.list)) {
+               if (nla_put_flag(msg->skb, TIPC_NLA_UDP_MULTI_REMOTEIP))
+                       goto msg_full;
+       }
+
+       nla_nest_end(msg->skb, nest);
+       return 0;
+msg_full:
+       nla_nest_cancel(msg->skb, nest);
+       return -EMSGSIZE;
+}
+
+/**
+ * tipc_parse_udp_addr - build udp media address from netlink data
+ * @nlattr:    netlink attribute containing sockaddr storage aligned address
+ * @addr:      tipc media address to fill with address, port and protocol type
+ * @scope_id:  IPv6 scope id pointer, not NULL indicates it's required
+ */
+
+static int tipc_parse_udp_addr(struct nlattr *nla, struct udp_media_addr *addr,
+                              u32 *scope_id)
+{
+       struct sockaddr_storage sa;
+
+       nla_memcpy(&sa, nla, sizeof(sa));
+       if (sa.ss_family == AF_INET) {
+               struct sockaddr_in *ip4 = (struct sockaddr_in *)&sa;
+
+               addr->proto = htons(ETH_P_IP);
+               addr->port = ip4->sin_port;
+               addr->ipv4.s_addr = ip4->sin_addr.s_addr;
                return 0;
 
 #if IS_ENABLED(CONFIG_IPV6)
-       } else if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET6) {
-               int atype;
-               struct sockaddr_in6 *ip6;
+       } else if (sa.ss_family == AF_INET6) {
+               struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)&sa;
 
-               ip6 = (struct sockaddr_in6 *)&sa_local;
-               atype = ipv6_addr_type(&ip6->sin6_addr);
-               if (__ipv6_addr_needs_scope_id(atype) && !ip6->sin6_scope_id)
-                       return -EINVAL;
+               addr->proto = htons(ETH_P_IPV6);
+               addr->port = ip6->sin6_port;
+               memcpy(&addr->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
+
+               /* Scope ID is only interesting for local addresses */
+               if (scope_id) {
+                       int atype;
 
-               local->proto = htons(ETH_P_IPV6);
-               local->port = ip6->sin6_port;
-               memcpy(&local->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
-               ub->ifindex = ip6->sin6_scope_id;
+                       atype = ipv6_addr_type(&ip6->sin6_addr);
+                       if (__ipv6_addr_needs_scope_id(atype) &&
+                           !ip6->sin6_scope_id) {
+                               return -EINVAL;
+                       }
+
+                       *scope_id = ip6->sin6_scope_id ? : 0;
+               }
 
-               ip6 = (struct sockaddr_in6 *)&sa_remote;
-               remote->proto = htons(ETH_P_IPV6);
-               remote->port = ip6->sin6_port;
-               memcpy(&remote->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
                return 0;
 #endif
        }
        return -EADDRNOTAVAIL;
 }
 
+int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr)
+{
+       int err;
+       struct udp_media_addr addr = {0};
+       struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
+       struct udp_media_addr *dst;
+
+       if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy))
+               return -EINVAL;
+
+       if (!opts[TIPC_NLA_UDP_REMOTE])
+               return -EINVAL;
+
+       err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &addr, NULL);
+       if (err)
+               return err;
+
+       dst = (struct udp_media_addr *)&b->bcast_addr.value;
+       if (tipc_udp_is_mcast_addr(dst)) {
+               pr_err("Can't add remote ip to TIPC UDP multicast bearer\n");
+               return -EINVAL;
+       }
+
+       if (tipc_udp_is_known_peer(b, &addr))
+               return 0;
+
+       return tipc_udp_rcast_add(b, &addr);
+}
+
 /**
  * tipc_udp_enable - callback to create a new udp bearer instance
  * @net:       network namespace
@@ -340,18 +643,38 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
 {
        int err = -EINVAL;
        struct udp_bearer *ub;
-       struct udp_media_addr *remote;
+       struct udp_media_addr remote = {0};
        struct udp_media_addr local = {0};
        struct udp_port_cfg udp_conf = {0};
        struct udp_tunnel_sock_cfg tuncfg = {NULL};
+       struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
 
        ub = kzalloc(sizeof(*ub), GFP_ATOMIC);
        if (!ub)
                return -ENOMEM;
 
-       remote = (struct udp_media_addr *)&b->bcast_addr.value;
-       memset(remote, 0, sizeof(struct udp_media_addr));
-       err = parse_options(attrs, ub, &local, remote);
+       INIT_LIST_HEAD(&ub->rcast.list);
+
+       if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
+               goto err;
+
+       if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX,
+                            attrs[TIPC_NLA_BEARER_UDP_OPTS],
+                            tipc_nl_udp_policy))
+               goto err;
+
+       if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) {
+               pr_err("Invalid UDP bearer configuration");
+               err = -EINVAL;
+               goto err;
+       }
+
+       err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_LOCAL], &local,
+                                 &ub->ifindex);
+       if (err)
+               goto err;
+
+       err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &remote, NULL);
        if (err)
                goto err;
 
@@ -396,10 +719,22 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
        tuncfg.encap_destroy = NULL;
        setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg);
 
-       if (enable_mcast(ub, remote))
+       /**
+        * The bcast media address port is used for all peers and the ip
+        * is used if it's a multicast address.
+        */
+       memcpy(&b->bcast_addr.value, &remote, sizeof(remote));
+       if (tipc_udp_is_mcast_addr(&remote))
+               err = enable_mcast(ub, &remote);
+       else
+               err = tipc_udp_rcast_add(b, &remote);
+       if (err)
                goto err;
+
        return 0;
 err:
+       if (ub->ubsock)
+               udp_tunnel_sock_release(ub->ubsock);
        kfree(ub);
        return err;
 }
@@ -408,6 +743,12 @@ err:
 static void cleanup_bearer(struct work_struct *work)
 {
        struct udp_bearer *ub = container_of(work, struct udp_bearer, work);
+       struct udp_replicast *rcast, *tmp;
+
+       list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+               list_del_rcu(&rcast->list);
+               kfree_rcu(rcast, rcu);
+       }
 
        if (ub->ubsock)
                udp_tunnel_sock_release(ub->ubsock);
diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h
new file mode 100644 (file)
index 0000000..281bbae
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * net/tipc/udp_media.h: Include file for UDP bearer media
+ *
+ * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
+ * Copyright (c) 2005, 2010-2011, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef CONFIG_TIPC_MEDIA_UDP
+#ifndef _TIPC_UDP_MEDIA_H
+#define _TIPC_UDP_MEDIA_H
+
+int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr);
+int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b);
+int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb);
+
+#endif
+#endif
index f1dffe8..8309687 100644 (file)
@@ -661,11 +661,11 @@ static int unix_set_peek_off(struct sock *sk, int val)
 {
        struct unix_sock *u = unix_sk(sk);
 
-       if (mutex_lock_interruptible(&u->readlock))
+       if (mutex_lock_interruptible(&u->iolock))
                return -EINTR;
 
        sk->sk_peek_off = val;
-       mutex_unlock(&u->readlock);
+       mutex_unlock(&u->iolock);
 
        return 0;
 }
@@ -779,7 +779,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
        spin_lock_init(&u->lock);
        atomic_long_set(&u->inflight, 0);
        INIT_LIST_HEAD(&u->link);
-       mutex_init(&u->readlock); /* single task reading lock */
+       mutex_init(&u->iolock); /* single task reading lock */
+       mutex_init(&u->bindlock); /* single task binding lock */
        init_waitqueue_head(&u->peer_wait);
        init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
        unix_insert_socket(unix_sockets_unbound(sk), sk);
@@ -848,7 +849,7 @@ static int unix_autobind(struct socket *sock)
        int err;
        unsigned int retries = 0;
 
-       err = mutex_lock_interruptible(&u->readlock);
+       err = mutex_lock_interruptible(&u->bindlock);
        if (err)
                return err;
 
@@ -895,7 +896,7 @@ retry:
        spin_unlock(&unix_table_lock);
        err = 0;
 
-out:   mutex_unlock(&u->readlock);
+out:   mutex_unlock(&u->bindlock);
        return err;
 }
 
@@ -954,20 +955,32 @@ fail:
        return NULL;
 }
 
-static int unix_mknod(struct dentry *dentry, const struct path *path, umode_t mode,
-                     struct path *res)
+static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 {
-       int err;
+       struct dentry *dentry;
+       struct path path;
+       int err = 0;
+       /*
+        * Get the parent directory, calculate the hash for last
+        * component.
+        */
+       dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+       err = PTR_ERR(dentry);
+       if (IS_ERR(dentry))
+               return err;
 
-       err = security_path_mknod(path, dentry, mode, 0);
+       /*
+        * All right, let's create it.
+        */
+       err = security_path_mknod(&path, dentry, mode, 0);
        if (!err) {
-               err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
+               err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
                if (!err) {
-                       res->mnt = mntget(path->mnt);
+                       res->mnt = mntget(path.mnt);
                        res->dentry = dget(dentry);
                }
        }
-
+       done_path_create(&path, dentry);
        return err;
 }
 
@@ -978,12 +991,10 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        struct unix_sock *u = unix_sk(sk);
        struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
        char *sun_path = sunaddr->sun_path;
-       int err, name_err;
+       int err;
        unsigned int hash;
        struct unix_address *addr;
        struct hlist_head *list;
-       struct path path;
-       struct dentry *dentry;
 
        err = -EINVAL;
        if (sunaddr->sun_family != AF_UNIX)
@@ -999,34 +1010,14 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                goto out;
        addr_len = err;
 
-       name_err = 0;
-       dentry = NULL;
-       if (sun_path[0]) {
-               /* Get the parent directory, calculate the hash for last
-                * component.
-                */
-               dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
-
-               if (IS_ERR(dentry)) {
-                       /* delay report until after 'already bound' check */
-                       name_err = PTR_ERR(dentry);
-                       dentry = NULL;
-               }
-       }
-
-       err = mutex_lock_interruptible(&u->readlock);
+       err = mutex_lock_interruptible(&u->bindlock);
        if (err)
-               goto out_path;
+               goto out;
 
        err = -EINVAL;
        if (u->addr)
                goto out_up;
 
-       if (name_err) {
-               err = name_err == -EEXIST ? -EADDRINUSE : name_err;
-               goto out_up;
-       }
-
        err = -ENOMEM;
        addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
        if (!addr)
@@ -1037,11 +1028,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        addr->hash = hash ^ sk->sk_type;
        atomic_set(&addr->refcnt, 1);
 
-       if (dentry) {
-               struct path u_path;
+       if (sun_path[0]) {
+               struct path path;
                umode_t mode = S_IFSOCK |
                       (SOCK_INODE(sock)->i_mode & ~current_umask());
-               err = unix_mknod(dentry, &path, mode, &u_path);
+               err = unix_mknod(sun_path, mode, &path);
                if (err) {
                        if (err == -EEXIST)
                                err = -EADDRINUSE;
@@ -1049,9 +1040,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                        goto out_up;
                }
                addr->hash = UNIX_HASH_SIZE;
-               hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+               hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
                spin_lock(&unix_table_lock);
-               u->path = u_path;
+               u->path = path;
                list = &unix_socket_table[hash];
        } else {
                spin_lock(&unix_table_lock);
@@ -1073,11 +1064,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 out_unlock:
        spin_unlock(&unix_table_lock);
 out_up:
-       mutex_unlock(&u->readlock);
-out_path:
-       if (dentry)
-               done_path_create(&path, dentry);
-
+       mutex_unlock(&u->bindlock);
 out:
        return err;
 }
@@ -1969,17 +1956,17 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
        if (false) {
 alloc_skb:
                unix_state_unlock(other);
-               mutex_unlock(&unix_sk(other)->readlock);
+               mutex_unlock(&unix_sk(other)->iolock);
                newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
                                              &err, 0);
                if (!newskb)
                        goto err;
        }
 
-       /* we must acquire readlock as we modify already present
+       /* we must acquire iolock as we modify already present
         * skbs in the sk_receive_queue and mess with skb->len
         */
-       err = mutex_lock_interruptible(&unix_sk(other)->readlock);
+       err = mutex_lock_interruptible(&unix_sk(other)->iolock);
        if (err) {
                err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
                goto err;
@@ -2046,7 +2033,7 @@ alloc_skb:
        }
 
        unix_state_unlock(other);
-       mutex_unlock(&unix_sk(other)->readlock);
+       mutex_unlock(&unix_sk(other)->iolock);
 
        other->sk_data_ready(other);
        scm_destroy(&scm);
@@ -2055,7 +2042,7 @@ alloc_skb:
 err_state_unlock:
        unix_state_unlock(other);
 err_unlock:
-       mutex_unlock(&unix_sk(other)->readlock);
+       mutex_unlock(&unix_sk(other)->iolock);
 err:
        kfree_skb(newskb);
        if (send_sigpipe && !(flags & MSG_NOSIGNAL))
@@ -2123,7 +2110,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
        timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
        do {
-               mutex_lock(&u->readlock);
+               mutex_lock(&u->iolock);
 
                skip = sk_peek_offset(sk, flags);
                skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
@@ -2131,14 +2118,14 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
                if (skb)
                        break;
 
-               mutex_unlock(&u->readlock);
+               mutex_unlock(&u->iolock);
 
                if (err != -EAGAIN)
                        break;
        } while (timeo &&
                 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
 
-       if (!skb) { /* implies readlock unlocked */
+       if (!skb) { /* implies iolock unlocked */
                unix_state_lock(sk);
                /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
                if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
@@ -2203,7 +2190,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
 
 out_free:
        skb_free_datagram(sk, skb);
-       mutex_unlock(&u->readlock);
+       mutex_unlock(&u->iolock);
 out:
        return err;
 }
@@ -2298,7 +2285,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
        /* Lock the socket to prevent queue disordering
         * while sleeps in memcpy_tomsg
         */
-       mutex_lock(&u->readlock);
+       mutex_lock(&u->iolock);
 
        if (flags & MSG_PEEK)
                skip = sk_peek_offset(sk, flags);
@@ -2340,7 +2327,7 @@ again:
                                break;
                        }
 
-                       mutex_unlock(&u->readlock);
+                       mutex_unlock(&u->iolock);
 
                        timeo = unix_stream_data_wait(sk, timeo, last,
                                                      last_len);
@@ -2351,7 +2338,7 @@ again:
                                goto out;
                        }
 
-                       mutex_lock(&u->readlock);
+                       mutex_lock(&u->iolock);
                        goto redo;
 unlock:
                        unix_state_unlock(sk);
@@ -2454,7 +2441,7 @@ unlock:
                }
        } while (size);
 
-       mutex_unlock(&u->readlock);
+       mutex_unlock(&u->iolock);
        if (state->msg)
                scm_recv(sock, state->msg, &scm, flags);
        else
@@ -2495,9 +2482,9 @@ static ssize_t skb_unix_socket_splice(struct sock *sk,
        int ret;
        struct unix_sock *u = unix_sk(sk);
 
-       mutex_unlock(&u->readlock);
+       mutex_unlock(&u->iolock);
        ret = splice_to_pipe(pipe, spd);
-       mutex_lock(&u->readlock);
+       mutex_lock(&u->iolock);
 
        return ret;
 }
index dbb2738..6250b1c 100644 (file)
@@ -958,29 +958,8 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
                        return private(dev, iwr, cmd, info, handler);
        }
        /* Old driver API : call driver ioctl handler */
-       if (dev->netdev_ops->ndo_do_ioctl) {
-#ifdef CONFIG_COMPAT
-               if (info->flags & IW_REQUEST_FLAG_COMPAT) {
-                       int ret = 0;
-                       struct iwreq iwr_lcl;
-                       struct compat_iw_point *iwp_compat = (void *) &iwr->u.data;
-
-                       memcpy(&iwr_lcl, iwr, sizeof(struct iwreq));
-                       iwr_lcl.u.data.pointer = compat_ptr(iwp_compat->pointer);
-                       iwr_lcl.u.data.length = iwp_compat->length;
-                       iwr_lcl.u.data.flags = iwp_compat->flags;
-
-                       ret = dev->netdev_ops->ndo_do_ioctl(dev, (void *) &iwr_lcl, cmd);
-
-                       iwp_compat->pointer = ptr_to_compat(iwr_lcl.u.data.pointer);
-                       iwp_compat->length = iwr_lcl.u.data.length;
-                       iwp_compat->flags = iwr_lcl.u.data.flags;
-
-                       return ret;
-               } else
-#endif
-                       return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
-       }
+       if (dev->netdev_ops->ndo_do_ioctl)
+               return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
        return -EOPNOTSUPP;
 }
 
index a750f33..f83b74d 100644 (file)
@@ -1500,12 +1500,8 @@ out_fac_release:
                        goto out_dtefac_release;
                if (dtefacs.calling_len > X25_MAX_AE_LEN)
                        goto out_dtefac_release;
-               if (dtefacs.calling_ae == NULL)
-                       goto out_dtefac_release;
                if (dtefacs.called_len > X25_MAX_AE_LEN)
                        goto out_dtefac_release;
-               if (dtefacs.called_ae == NULL)
-                       goto out_dtefac_release;
                x25->dte_facilities = dtefacs;
                rc = 0;
 out_dtefac_release:
index 250e567..44ac85f 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/crypto.h>
 #include <linux/scatterlist.h>
 #include <net/xfrm.h>
-#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
+#if IS_ENABLED(CONFIG_INET_ESP) || IS_ENABLED(CONFIG_INET6_ESP)
 #include <net/esp.h>
 #endif
 
index 1c4ad47..6e3f025 100644 (file)
@@ -207,15 +207,15 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
        family = XFRM_SPI_SKB_CB(skb)->family;
 
        /* if tunnel is present override skb->mark value with tunnel i_key */
-       if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4) {
-               switch (family) {
-               case AF_INET:
+       switch (family) {
+       case AF_INET:
+               if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4)
                        mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key);
-                       break;
-               case AF_INET6:
+               break;
+       case AF_INET6:
+               if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6)
                        mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key);
-                       break;
-               }
+               break;
        }
 
        /* Allocate new secpath or COW existing one. */
index b5e665b..fd69866 100644 (file)
@@ -49,6 +49,7 @@ static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
                                                __read_mostly;
 
 static struct kmem_cache *xfrm_dst_cache __read_mostly;
+static __read_mostly seqcount_t xfrm_policy_hash_generation;
 
 static void xfrm_init_pmtu(struct dst_entry *dst);
 static int stale_bundle(struct dst_entry *dst);
@@ -59,6 +60,11 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
                                                int dir);
 
+static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy)
+{
+       return atomic_inc_not_zero(&policy->refcnt);
+}
+
 static inline bool
 __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
 {
@@ -385,9 +391,11 @@ static struct hlist_head *policy_hash_bysel(struct net *net,
        __get_hash_thresh(net, family, dir, &dbits, &sbits);
        hash = __sel_hash(sel, family, hmask, dbits, sbits);
 
-       return (hash == hmask + 1 ?
-               &net->xfrm.policy_inexact[dir] :
-               net->xfrm.policy_bydst[dir].table + hash);
+       if (hash == hmask + 1)
+               return &net->xfrm.policy_inexact[dir];
+
+       return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
+                    lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
 }
 
 static struct hlist_head *policy_hash_direct(struct net *net,
@@ -403,7 +411,8 @@ static struct hlist_head *policy_hash_direct(struct net *net,
        __get_hash_thresh(net, family, dir, &dbits, &sbits);
        hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
 
-       return net->xfrm.policy_bydst[dir].table + hash;
+       return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
+                    lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
 }
 
 static void xfrm_dst_hash_transfer(struct net *net,
@@ -426,14 +435,14 @@ redo:
                h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
                                pol->family, nhashmask, dbits, sbits);
                if (!entry0) {
-                       hlist_del(&pol->bydst);
-                       hlist_add_head(&pol->bydst, ndsttable+h);
+                       hlist_del_rcu(&pol->bydst);
+                       hlist_add_head_rcu(&pol->bydst, ndsttable + h);
                        h0 = h;
                } else {
                        if (h != h0)
                                continue;
-                       hlist_del(&pol->bydst);
-                       hlist_add_behind(&pol->bydst, entry0);
+                       hlist_del_rcu(&pol->bydst);
+                       hlist_add_behind_rcu(&pol->bydst, entry0);
                }
                entry0 = &pol->bydst;
        }
@@ -468,22 +477,32 @@ static void xfrm_bydst_resize(struct net *net, int dir)
        unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
        unsigned int nhashmask = xfrm_new_hash_mask(hmask);
        unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
-       struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
        struct hlist_head *ndst = xfrm_hash_alloc(nsize);
+       struct hlist_head *odst;
        int i;
 
        if (!ndst)
                return;
 
-       write_lock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+       write_seqcount_begin(&xfrm_policy_hash_generation);
+
+       odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
+                               lockdep_is_held(&net->xfrm.xfrm_policy_lock));
+
+       odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
+                               lockdep_is_held(&net->xfrm.xfrm_policy_lock));
 
        for (i = hmask; i >= 0; i--)
                xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
 
-       net->xfrm.policy_bydst[dir].table = ndst;
+       rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst);
        net->xfrm.policy_bydst[dir].hmask = nhashmask;
 
-       write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+       write_seqcount_end(&xfrm_policy_hash_generation);
+       spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+
+       synchronize_rcu();
 
        xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
 }
@@ -500,7 +519,7 @@ static void xfrm_byidx_resize(struct net *net, int total)
        if (!nidx)
                return;
 
-       write_lock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 
        for (i = hmask; i >= 0; i--)
                xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
@@ -508,7 +527,7 @@ static void xfrm_byidx_resize(struct net *net, int total)
        net->xfrm.policy_byidx = nidx;
        net->xfrm.policy_idx_hmask = nhashmask;
 
-       write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
        xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
 }
@@ -541,7 +560,6 @@ static inline int xfrm_byidx_should_resize(struct net *net, int total)
 
 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
 {
-       read_lock_bh(&net->xfrm.xfrm_policy_lock);
        si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
        si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
        si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
@@ -550,7 +568,6 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
        si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
        si->spdhcnt = net->xfrm.policy_idx_hmask;
        si->spdhmcnt = xfrm_policy_hashmax;
-       read_unlock_bh(&net->xfrm.xfrm_policy_lock);
 }
 EXPORT_SYMBOL(xfrm_spd_getinfo);
 
@@ -600,7 +617,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
                rbits6 = net->xfrm.policy_hthresh.rbits6;
        } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
 
-       write_lock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 
        /* reset the bydst and inexact table in all directions */
        for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
@@ -626,6 +643,10 @@ static void xfrm_hash_rebuild(struct work_struct *work)
 
        /* re-insert all policies by order of creation */
        list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
+               if (xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) {
+                       /* skip socket policies */
+                       continue;
+               }
                newpos = NULL;
                chain = policy_hash_bysel(net, &policy->selector,
                                          policy->family,
@@ -642,7 +663,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
                        hlist_add_head(&policy->bydst, chain);
        }
 
-       write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
        mutex_unlock(&hash_resize_mutex);
 }
@@ -753,7 +774,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
        struct hlist_head *chain;
        struct hlist_node *newpos;
 
-       write_lock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_lock_bh(&net->xfrm.xfrm_policy_lock);
        chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
        delpol = NULL;
        newpos = NULL;
@@ -764,7 +785,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
                    xfrm_sec_ctx_match(pol->security, policy->security) &&
                    !WARN_ON(delpol)) {
                        if (excl) {
-                               write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+                               spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
                                return -EEXIST;
                        }
                        delpol = pol;
@@ -800,7 +821,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
        policy->curlft.use_time = 0;
        if (!mod_timer(&policy->timer, jiffies + HZ))
                xfrm_pol_hold(policy);
-       write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
        if (delpol)
                xfrm_policy_kill(delpol);
@@ -820,7 +841,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
        struct hlist_head *chain;
 
        *err = 0;
-       write_lock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_lock_bh(&net->xfrm.xfrm_policy_lock);
        chain = policy_hash_bysel(net, sel, sel->family, dir);
        ret = NULL;
        hlist_for_each_entry(pol, chain, bydst) {
@@ -833,7 +854,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
                                *err = security_xfrm_policy_delete(
                                                                pol->security);
                                if (*err) {
-                                       write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+                                       spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
                                        return pol;
                                }
                                __xfrm_policy_unlink(pol, dir);
@@ -842,7 +863,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
                        break;
                }
        }
-       write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
        if (ret && delete)
                xfrm_policy_kill(ret);
@@ -861,7 +882,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
                return NULL;
 
        *err = 0;
-       write_lock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_lock_bh(&net->xfrm.xfrm_policy_lock);
        chain = net->xfrm.policy_byidx + idx_hash(net, id);
        ret = NULL;
        hlist_for_each_entry(pol, chain, byidx) {
@@ -872,7 +893,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
                                *err = security_xfrm_policy_delete(
                                                                pol->security);
                                if (*err) {
-                                       write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+                                       spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
                                        return pol;
                                }
                                __xfrm_policy_unlink(pol, dir);
@@ -881,7 +902,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
                        break;
                }
        }
-       write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
        if (ret && delete)
                xfrm_policy_kill(ret);
@@ -939,7 +960,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
 {
        int dir, err = 0, cnt = 0;
 
-       write_lock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 
        err = xfrm_policy_flush_secctx_check(net, type, task_valid);
        if (err)
@@ -955,14 +976,14 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
                        if (pol->type != type)
                                continue;
                        __xfrm_policy_unlink(pol, dir);
-                       write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+                       spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
                        cnt++;
 
                        xfrm_audit_policy_delete(pol, 1, task_valid);
 
                        xfrm_policy_kill(pol);
 
-                       write_lock_bh(&net->xfrm.xfrm_policy_lock);
+                       spin_lock_bh(&net->xfrm.xfrm_policy_lock);
                        goto again1;
                }
 
@@ -974,13 +995,13 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
                                if (pol->type != type)
                                        continue;
                                __xfrm_policy_unlink(pol, dir);
-                               write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+                               spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
                                cnt++;
 
                                xfrm_audit_policy_delete(pol, 1, task_valid);
                                xfrm_policy_kill(pol);
 
-                               write_lock_bh(&net->xfrm.xfrm_policy_lock);
+                               spin_lock_bh(&net->xfrm.xfrm_policy_lock);
                                goto again2;
                        }
                }
@@ -989,7 +1010,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
        if (!cnt)
                err = -ESRCH;
 out:
-       write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
        return err;
 }
 EXPORT_SYMBOL(xfrm_policy_flush);
@@ -1009,7 +1030,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
        if (list_empty(&walk->walk.all) && walk->seq != 0)
                return 0;
 
-       write_lock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_lock_bh(&net->xfrm.xfrm_policy_lock);
        if (list_empty(&walk->walk.all))
                x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
        else
@@ -1037,7 +1058,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
        }
        list_del_init(&walk->walk.all);
 out:
-       write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
        return error;
 }
 EXPORT_SYMBOL(xfrm_policy_walk);
@@ -1056,9 +1077,9 @@ void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net)
        if (list_empty(&walk->walk.all))
                return;
 
-       write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
+       spin_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
        list_del(&walk->walk.all);
-       write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 }
 EXPORT_SYMBOL(xfrm_policy_walk_done);
 
@@ -1096,17 +1117,24 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
        struct xfrm_policy *pol, *ret;
        const xfrm_address_t *daddr, *saddr;
        struct hlist_head *chain;
-       u32 priority = ~0U;
+       unsigned int sequence;
+       u32 priority;
 
        daddr = xfrm_flowi_daddr(fl, family);
        saddr = xfrm_flowi_saddr(fl, family);
        if (unlikely(!daddr || !saddr))
                return NULL;
 
-       read_lock_bh(&net->xfrm.xfrm_policy_lock);
-       chain = policy_hash_direct(net, daddr, saddr, family, dir);
+       rcu_read_lock();
+ retry:
+       do {
+               sequence = read_seqcount_begin(&xfrm_policy_hash_generation);
+               chain = policy_hash_direct(net, daddr, saddr, family, dir);
+       } while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence));
+
+       priority = ~0U;
        ret = NULL;
-       hlist_for_each_entry(pol, chain, bydst) {
+       hlist_for_each_entry_rcu(pol, chain, bydst) {
                err = xfrm_policy_match(pol, fl, type, family, dir);
                if (err) {
                        if (err == -ESRCH)
@@ -1122,7 +1150,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
                }
        }
        chain = &net->xfrm.policy_inexact[dir];
-       hlist_for_each_entry(pol, chain, bydst) {
+       hlist_for_each_entry_rcu(pol, chain, bydst) {
                if ((pol->priority >= priority) && ret)
                        break;
 
@@ -1140,9 +1168,13 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
                }
        }
 
-       xfrm_pol_hold(ret);
+       if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence))
+               goto retry;
+
+       if (ret && !xfrm_pol_hold_rcu(ret))
+               goto retry;
 fail:
-       read_unlock_bh(&net->xfrm.xfrm_policy_lock);
+       rcu_read_unlock();
 
        return ret;
 }
@@ -1219,10 +1251,9 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
                                                 const struct flowi *fl)
 {
        struct xfrm_policy *pol;
-       struct net *net = sock_net(sk);
 
        rcu_read_lock();
-       read_lock_bh(&net->xfrm.xfrm_policy_lock);
+ again:
        pol = rcu_dereference(sk->sk_policy[dir]);
        if (pol != NULL) {
                bool match = xfrm_selector_match(&pol->selector, fl,
@@ -1237,8 +1268,8 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
                        err = security_xfrm_policy_lookup(pol->security,
                                                      fl->flowi_secid,
                                                      policy_to_flow_dir(dir));
-                       if (!err)
-                               xfrm_pol_hold(pol);
+                       if (!err && !xfrm_pol_hold_rcu(pol))
+                               goto again;
                        else if (err == -ESRCH)
                                pol = NULL;
                        else
@@ -1247,7 +1278,6 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
                        pol = NULL;
        }
 out:
-       read_unlock_bh(&net->xfrm.xfrm_policy_lock);
        rcu_read_unlock();
        return pol;
 }
@@ -1271,7 +1301,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
 
        /* Socket policies are not hashed. */
        if (!hlist_unhashed(&pol->bydst)) {
-               hlist_del(&pol->bydst);
+               hlist_del_rcu(&pol->bydst);
                hlist_del(&pol->byidx);
        }
 
@@ -1295,9 +1325,9 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
 {
        struct net *net = xp_net(pol);
 
-       write_lock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_lock_bh(&net->xfrm.xfrm_policy_lock);
        pol = __xfrm_policy_unlink(pol, dir);
-       write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
        if (pol) {
                xfrm_policy_kill(pol);
                return 0;
@@ -1316,7 +1346,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
                return -EINVAL;
 #endif
 
-       write_lock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_lock_bh(&net->xfrm.xfrm_policy_lock);
        old_pol = rcu_dereference_protected(sk->sk_policy[dir],
                                lockdep_is_held(&net->xfrm.xfrm_policy_lock));
        if (pol) {
@@ -1334,7 +1364,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
                 */
                xfrm_sk_policy_unlink(old_pol, dir);
        }
-       write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
        if (old_pol) {
                xfrm_policy_kill(old_pol);
@@ -1364,9 +1394,9 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
                newp->type = old->type;
                memcpy(newp->xfrm_vec, old->xfrm_vec,
                       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
-               write_lock_bh(&net->xfrm.xfrm_policy_lock);
+               spin_lock_bh(&net->xfrm.xfrm_policy_lock);
                xfrm_sk_policy_link(newp, dir);
-               write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+               spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
                xfrm_pol_put(newp);
        }
        return newp;
@@ -3048,7 +3078,7 @@ static int __net_init xfrm_net_init(struct net *net)
 
        /* Initialize the per-net locks here */
        spin_lock_init(&net->xfrm.xfrm_state_lock);
-       rwlock_init(&net->xfrm.xfrm_policy_lock);
+       spin_lock_init(&net->xfrm.xfrm_policy_lock);
        mutex_init(&net->xfrm.xfrm_cfg_mutex);
 
        return 0;
@@ -3082,6 +3112,7 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
 void __init xfrm_init(void)
 {
        register_pernet_subsys(&xfrm_net_ops);
+       seqcount_init(&xfrm_policy_hash_generation);
        xfrm_input_init();
 }
 
@@ -3179,7 +3210,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
        struct hlist_head *chain;
        u32 priority = ~0U;
 
-       read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/
+       spin_lock_bh(&net->xfrm.xfrm_policy_lock);
        chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
        hlist_for_each_entry(pol, chain, bydst) {
                if (xfrm_migrate_selector_match(sel, &pol->selector) &&
@@ -3203,7 +3234,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
 
        xfrm_pol_hold(ret);
 
-       read_unlock_bh(&net->xfrm.xfrm_policy_lock);
+       spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
        return ret;
 }
index 4fd725a..cdc2e2e 100644 (file)
@@ -558,7 +558,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
                x->repl->notify(x, XFRM_REPLAY_UPDATE);
 }
 
-static struct xfrm_replay xfrm_replay_legacy = {
+static const struct xfrm_replay xfrm_replay_legacy = {
        .advance        = xfrm_replay_advance,
        .check          = xfrm_replay_check,
        .recheck        = xfrm_replay_check,
@@ -566,7 +566,7 @@ static struct xfrm_replay xfrm_replay_legacy = {
        .overflow       = xfrm_replay_overflow,
 };
 
-static struct xfrm_replay xfrm_replay_bmp = {
+static const struct xfrm_replay xfrm_replay_bmp = {
        .advance        = xfrm_replay_advance_bmp,
        .check          = xfrm_replay_check_bmp,
        .recheck        = xfrm_replay_check_bmp,
@@ -574,7 +574,7 @@ static struct xfrm_replay xfrm_replay_bmp = {
        .overflow       = xfrm_replay_overflow_bmp,
 };
 
-static struct xfrm_replay xfrm_replay_esn = {
+static const struct xfrm_replay xfrm_replay_esn = {
        .advance        = xfrm_replay_advance_esn,
        .check          = xfrm_replay_check_esn,
        .recheck        = xfrm_replay_recheck_esn,
index 9895a8c..ba8bf51 100644 (file)
 
 #include "xfrm_hash.h"
 
+#define xfrm_state_deref_prot(table, net) \
+       rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
+
+static void xfrm_state_gc_task(struct work_struct *work);
+
 /* Each xfrm_state may be linked to two tables:
 
    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
  */
 
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
+static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
+
+static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
+static HLIST_HEAD(xfrm_state_gc_list);
+
+static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
+{
+       return atomic_inc_not_zero(&x->refcnt);
+}
 
 static inline unsigned int xfrm_dst_hash(struct net *net,
                                         const xfrm_address_t *daddr,
@@ -76,18 +90,18 @@ static void xfrm_hash_transfer(struct hlist_head *list,
                h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
                                    x->props.reqid, x->props.family,
                                    nhashmask);
-               hlist_add_head(&x->bydst, ndsttable+h);
+               hlist_add_head_rcu(&x->bydst, ndsttable + h);
 
                h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
                                    x->props.family,
                                    nhashmask);
-               hlist_add_head(&x->bysrc, nsrctable+h);
+               hlist_add_head_rcu(&x->bysrc, nsrctable + h);
 
                if (x->id.spi) {
                        h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
                                            x->id.proto, x->props.family,
                                            nhashmask);
-                       hlist_add_head(&x->byspi, nspitable+h);
+                       hlist_add_head_rcu(&x->byspi, nspitable + h);
                }
        }
 }
@@ -122,25 +136,29 @@ static void xfrm_hash_resize(struct work_struct *work)
        }
 
        spin_lock_bh(&net->xfrm.xfrm_state_lock);
+       write_seqcount_begin(&xfrm_state_hash_generation);
 
        nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
+       odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
        for (i = net->xfrm.state_hmask; i >= 0; i--)
-               xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi,
-                                  nhashmask);
+               xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask);
 
-       odst = net->xfrm.state_bydst;
-       osrc = net->xfrm.state_bysrc;
-       ospi = net->xfrm.state_byspi;
+       osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
+       ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
        ohashmask = net->xfrm.state_hmask;
 
-       net->xfrm.state_bydst = ndst;
-       net->xfrm.state_bysrc = nsrc;
-       net->xfrm.state_byspi = nspi;
+       rcu_assign_pointer(net->xfrm.state_bydst, ndst);
+       rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
+       rcu_assign_pointer(net->xfrm.state_byspi, nspi);
        net->xfrm.state_hmask = nhashmask;
 
+       write_seqcount_end(&xfrm_state_hash_generation);
        spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 
        osize = (ohashmask + 1) * sizeof(struct hlist_head);
+
+       synchronize_rcu();
+
        xfrm_hash_free(odst, osize);
        xfrm_hash_free(osrc, osize);
        xfrm_hash_free(ospi, osize);
@@ -355,15 +373,16 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
 
 static void xfrm_state_gc_task(struct work_struct *work)
 {
-       struct net *net = container_of(work, struct net, xfrm.state_gc_work);
        struct xfrm_state *x;
        struct hlist_node *tmp;
        struct hlist_head gc_list;
 
        spin_lock_bh(&xfrm_state_gc_lock);
-       hlist_move_list(&net->xfrm.state_gc_list, &gc_list);
+       hlist_move_list(&xfrm_state_gc_list, &gc_list);
        spin_unlock_bh(&xfrm_state_gc_lock);
 
+       synchronize_rcu();
+
        hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
                xfrm_state_gc_destroy(x);
 }
@@ -500,14 +519,12 @@ EXPORT_SYMBOL(xfrm_state_alloc);
 
 void __xfrm_state_destroy(struct xfrm_state *x)
 {
-       struct net *net = xs_net(x);
-
        WARN_ON(x->km.state != XFRM_STATE_DEAD);
 
        spin_lock_bh(&xfrm_state_gc_lock);
-       hlist_add_head(&x->gclist, &net->xfrm.state_gc_list);
+       hlist_add_head(&x->gclist, &xfrm_state_gc_list);
        spin_unlock_bh(&xfrm_state_gc_lock);
-       schedule_work(&net->xfrm.state_gc_work);
+       schedule_work(&xfrm_state_gc_work);
 }
 EXPORT_SYMBOL(__xfrm_state_destroy);
 
@@ -520,10 +537,10 @@ int __xfrm_state_delete(struct xfrm_state *x)
                x->km.state = XFRM_STATE_DEAD;
                spin_lock(&net->xfrm.xfrm_state_lock);
                list_del(&x->km.all);
-               hlist_del(&x->bydst);
-               hlist_del(&x->bysrc);
+               hlist_del_rcu(&x->bydst);
+               hlist_del_rcu(&x->bysrc);
                if (x->id.spi)
-                       hlist_del(&x->byspi);
+                       hlist_del_rcu(&x->byspi);
                net->xfrm.state_num--;
                spin_unlock(&net->xfrm.xfrm_state_lock);
 
@@ -659,7 +676,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
        unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
        struct xfrm_state *x;
 
-       hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) {
+       hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
                if (x->props.family != family ||
                    x->id.spi       != spi ||
                    x->id.proto     != proto ||
@@ -668,7 +685,8 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
 
                if ((mark & x->mark.m) != x->mark.v)
                        continue;
-               xfrm_state_hold(x);
+               if (!xfrm_state_hold_rcu(x))
+                       continue;
                return x;
        }
 
@@ -683,7 +701,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
        unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
        struct xfrm_state *x;
 
-       hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
+       hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
                if (x->props.family != family ||
                    x->id.proto     != proto ||
                    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
@@ -692,7 +710,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
 
                if ((mark & x->mark.m) != x->mark.v)
                        continue;
-               xfrm_state_hold(x);
+               if (!xfrm_state_hold_rcu(x))
+                       continue;
                return x;
        }
 
@@ -775,13 +794,16 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
        struct xfrm_state *best = NULL;
        u32 mark = pol->mark.v & pol->mark.m;
        unsigned short encap_family = tmpl->encap_family;
+       unsigned int sequence;
        struct km_event c;
 
        to_put = NULL;
 
-       spin_lock_bh(&net->xfrm.xfrm_state_lock);
+       sequence = read_seqcount_begin(&xfrm_state_hash_generation);
+
+       rcu_read_lock();
        h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
-       hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
+       hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
                if (x->props.family == encap_family &&
                    x->props.reqid == tmpl->reqid &&
                    (mark & x->mark.m) == x->mark.v &&
@@ -797,7 +819,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
                goto found;
 
        h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
-       hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) {
+       hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
                if (x->props.family == encap_family &&
                    x->props.reqid == tmpl->reqid &&
                    (mark & x->mark.m) == x->mark.v &&
@@ -850,19 +872,21 @@ found:
                }
 
                if (km_query(x, tmpl, pol) == 0) {
+                       spin_lock_bh(&net->xfrm.xfrm_state_lock);
                        x->km.state = XFRM_STATE_ACQ;
                        list_add(&x->km.all, &net->xfrm.state_all);
-                       hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+                       hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
                        h = xfrm_src_hash(net, daddr, saddr, encap_family);
-                       hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+                       hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
                        if (x->id.spi) {
                                h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
-                               hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+                               hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
                        }
                        x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
                        tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
                        net->xfrm.state_num++;
                        xfrm_hash_grow_check(net, x->bydst.next != NULL);
+                       spin_unlock_bh(&net->xfrm.xfrm_state_lock);
                } else {
                        x->km.state = XFRM_STATE_DEAD;
                        to_put = x;
@@ -871,13 +895,26 @@ found:
                }
        }
 out:
-       if (x)
-               xfrm_state_hold(x);
-       else
+       if (x) {
+               if (!xfrm_state_hold_rcu(x)) {
+                       *err = -EAGAIN;
+                       x = NULL;
+               }
+       } else {
                *err = acquire_in_progress ? -EAGAIN : error;
-       spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+       }
+       rcu_read_unlock();
        if (to_put)
                xfrm_state_put(to_put);
+
+       if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) {
+               *err = -EAGAIN;
+               if (x) {
+                       xfrm_state_put(x);
+                       x = NULL;
+               }
+       }
+
        return x;
 }
 
@@ -945,16 +982,16 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 
        h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
                          x->props.reqid, x->props.family);
-       hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+       hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
 
        h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
-       hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+       hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
 
        if (x->id.spi) {
                h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
                                  x->props.family);
 
-               hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+               hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
        }
 
        tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
@@ -1063,9 +1100,9 @@ static struct xfrm_state *__find_acq_core(struct net *net,
                xfrm_state_hold(x);
                tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
                list_add(&x->km.all, &net->xfrm.state_all);
-               hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+               hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
                h = xfrm_src_hash(net, daddr, saddr, family);
-               hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+               hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
 
                net->xfrm.state_num++;
 
@@ -1581,7 +1618,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
        if (x->id.spi) {
                spin_lock_bh(&net->xfrm.xfrm_state_lock);
                h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
-               hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+               hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
                spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 
                err = 0;
@@ -2099,8 +2136,6 @@ int __net_init xfrm_state_init(struct net *net)
 
        net->xfrm.state_num = 0;
        INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
-       INIT_HLIST_HEAD(&net->xfrm.state_gc_list);
-       INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task);
        spin_lock_init(&net->xfrm.xfrm_state_lock);
        return 0;
 
@@ -2118,7 +2153,7 @@ void xfrm_state_fini(struct net *net)
 
        flush_work(&net->xfrm.state_hash_work);
        xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
-       flush_work(&net->xfrm.state_gc_work);
+       flush_work(&xfrm_state_gc_work);
 
        WARN_ON(!list_empty(&net->xfrm.state_all));
 
index 05a6e3d..35a7e79 100644 (file)
@@ -17,13 +17,13 @@ static struct ctl_table xfrm_table[] = {
                .procname       = "xfrm_aevent_etime",
                .maxlen         = sizeof(u32),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec
+               .proc_handler   = proc_douintvec
        },
        {
                .procname       = "xfrm_aevent_rseqth",
                .maxlen         = sizeof(u32),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec
+               .proc_handler   = proc_douintvec
        },
        {
                .procname       = "xfrm_larval_drop",
index d516845..cb65d91 100644 (file)
@@ -896,7 +896,8 @@ static int xfrm_dump_sa_done(struct netlink_callback *cb)
        struct sock *sk = cb->skb->sk;
        struct net *net = sock_net(sk);
 
-       xfrm_state_walk_done(walk, net);
+       if (cb->args[0])
+               xfrm_state_walk_done(walk, net);
        return 0;
 }
 
@@ -921,8 +922,6 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
                u8 proto = 0;
                int err;
 
-               cb->args[0] = 1;
-
                err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX,
                                  xfrma_policy);
                if (err < 0)
@@ -939,6 +938,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
                        proto = nla_get_u8(attrs[XFRMA_PROTO]);
 
                xfrm_state_walk_init(walk, proto, filter);
+               cb->args[0] = 1;
        }
 
        (void) xfrm_state_walk(net, walk, dump_one_state, &info);
@@ -2051,9 +2051,6 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
        if (up->hard) {
                xfrm_policy_delete(xp, p->dir);
                xfrm_audit_policy_delete(xp, 1, true);
-       } else {
-               // reset the timers here?
-               WARN(1, "Don't know what to do with soft policy expire\n");
        }
        km_policy_expired(xp, p->dir, up->hard, nlh->nlmsg_pid);
 
@@ -2117,7 +2114,7 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
 
        err = verify_newpolicy_info(&ua->policy);
        if (err)
-               goto bad_policy;
+               goto free_state;
 
        /*   build an XP */
        xp = xfrm_policy_construct(net, &ua->policy, attrs, &err);
@@ -2149,8 +2146,6 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
 
        return 0;
 
-bad_policy:
-       WARN(1, "BAD policy passed\n");
 free_state:
        kfree(x);
 nomem:
index eb582c6..12b7304 100644 (file)
@@ -25,6 +25,8 @@ hostprogs-y += test_cgrp2_array_pin
 hostprogs-y += xdp1
 hostprogs-y += xdp2
 hostprogs-y += test_current_task_under_cgroup
+hostprogs-y += trace_event
+hostprogs-y += sampleip
 
 test_verifier-objs := test_verifier.o libbpf.o
 test_maps-objs := test_maps.o libbpf.o
@@ -52,6 +54,8 @@ xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
 xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
 test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \
                                       test_current_task_under_cgroup_user.o
+trace_event-objs := bpf_load.o libbpf.o trace_event_user.o
+sampleip-objs := bpf_load.o libbpf.o sampleip_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -67,6 +71,7 @@ always += tracex6_kern.o
 always += test_probe_write_user_kern.o
 always += trace_output_kern.o
 always += tcbpf1_kern.o
+always += tcbpf2_kern.o
 always += lathist_kern.o
 always += offwaketime_kern.o
 always += spintest_kern.o
@@ -78,6 +83,8 @@ always += test_cgrp2_tc_kern.o
 always += xdp1_kern.o
 always += xdp2_kern.o
 always += test_current_task_under_cgroup_kern.o
+always += trace_event_kern.o
+always += sampleip_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
@@ -102,6 +109,8 @@ HOSTLOADLIBES_test_overhead += -lelf -lrt
 HOSTLOADLIBES_xdp1 += -lelf
 HOSTLOADLIBES_xdp2 += -lelf
 HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
+HOSTLOADLIBES_trace_event += -lelf
+HOSTLOADLIBES_sampleip += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
index 6f1672a..90f44bd 100644 (file)
@@ -47,6 +47,16 @@ static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
        (void *) BPF_FUNC_probe_write_user;
 static int (*bpf_current_task_under_cgroup)(void *map, int index) =
        (void *) BPF_FUNC_current_task_under_cgroup;
+static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) =
+       (void *) BPF_FUNC_skb_get_tunnel_key;
+static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) =
+       (void *) BPF_FUNC_skb_set_tunnel_key;
+static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) =
+       (void *) BPF_FUNC_skb_get_tunnel_opt;
+static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) =
+       (void *) BPF_FUNC_skb_set_tunnel_opt;
+static unsigned long long (*bpf_get_prandom_u32)(void) =
+       (void *) BPF_FUNC_get_prandom_u32;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
index 0cfda23..97913e1 100644 (file)
@@ -51,6 +51,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
        bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
        bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
        bool is_xdp = strncmp(event, "xdp", 3) == 0;
+       bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
        enum bpf_prog_type prog_type;
        char buf[256];
        int fd, efd, err, id;
@@ -69,6 +70,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
                prog_type = BPF_PROG_TYPE_TRACEPOINT;
        } else if (is_xdp) {
                prog_type = BPF_PROG_TYPE_XDP;
+       } else if (is_perf_event) {
+               prog_type = BPF_PROG_TYPE_PERF_EVENT;
        } else {
                printf("Unknown event '%s'\n", event);
                return -1;
@@ -82,7 +85,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 
        prog_fd[prog_cnt++] = fd;
 
-       if (is_xdp)
+       if (is_xdp || is_perf_event)
                return 0;
 
        if (is_socket) {
@@ -326,6 +329,7 @@ int load_bpf_file(char *path)
                            memcmp(shname_prog, "kretprobe/", 10) == 0 ||
                            memcmp(shname_prog, "tracepoint/", 11) == 0 ||
                            memcmp(shname_prog, "xdp", 3) == 0 ||
+                           memcmp(shname_prog, "perf_event", 10) == 0 ||
                            memcmp(shname_prog, "socket", 6) == 0)
                                load_and_attach(shname_prog, insns, data_prog->d_size);
                }
@@ -344,6 +348,7 @@ int load_bpf_file(char *path)
                    memcmp(shname, "kretprobe/", 10) == 0 ||
                    memcmp(shname, "tracepoint/", 11) == 0 ||
                    memcmp(shname, "xdp", 3) == 0 ||
+                   memcmp(shname, "perf_event", 10) == 0 ||
                    memcmp(shname, "socket", 6) == 0)
                        load_and_attach(shname, data->d_buf, data->d_size);
        }
diff --git a/samples/bpf/sampleip_kern.c b/samples/bpf/sampleip_kern.c
new file mode 100644 (file)
index 0000000..774a681
--- /dev/null
@@ -0,0 +1,38 @@
+/* Copyright 2016 Netflix, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/bpf_perf_event.h>
+#include "bpf_helpers.h"
+
+#define MAX_IPS                8192
+
+struct bpf_map_def SEC("maps") ip_map = {
+       .type = BPF_MAP_TYPE_HASH,
+       .key_size = sizeof(u64),
+       .value_size = sizeof(u32),
+       .max_entries = MAX_IPS,
+};
+
+SEC("perf_event")
+int do_sample(struct bpf_perf_event_data *ctx)
+{
+       u64 ip;
+       u32 *value, init_val = 1;
+
+       ip = ctx->regs.ip;
+       value = bpf_map_lookup_elem(&ip_map, &ip);
+       if (value)
+               *value += 1;
+       else
+               /* E2BIG not tested for this example only */
+               bpf_map_update_elem(&ip_map, &ip, &init_val, BPF_NOEXIST);
+
+       return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c
new file mode 100644 (file)
index 0000000..260a6bd
--- /dev/null
@@ -0,0 +1,196 @@
+/*
+ * sampleip: sample instruction pointer and frequency count in a BPF map.
+ *
+ * Copyright 2016 Netflix, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <assert.h>
+#include <linux/perf_event.h>
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <sys/ioctl.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define DEFAULT_FREQ   99
+#define DEFAULT_SECS   5
+#define MAX_IPS                8192
+#define PAGE_OFFSET    0xffff880000000000
+
+static int nr_cpus;
+
+static void usage(void)
+{
+       printf("USAGE: sampleip [-F freq] [duration]\n");
+       printf("       -F freq    # sample frequency (Hertz), default 99\n");
+       printf("       duration   # sampling duration (seconds), default 5\n");
+}
+
+static int sampling_start(int *pmu_fd, int freq)
+{
+       int i;
+
+       struct perf_event_attr pe_sample_attr = {
+               .type = PERF_TYPE_SOFTWARE,
+               .freq = 1,
+               .sample_period = freq,
+               .config = PERF_COUNT_SW_CPU_CLOCK,
+               .inherit = 1,
+       };
+
+       for (i = 0; i < nr_cpus; i++) {
+               pmu_fd[i] = perf_event_open(&pe_sample_attr, -1 /* pid */, i,
+                                           -1 /* group_fd */, 0 /* flags */);
+               if (pmu_fd[i] < 0) {
+                       fprintf(stderr, "ERROR: Initializing perf sampling\n");
+                       return 1;
+               }
+               assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF,
+                            prog_fd[0]) == 0);
+               assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0);
+       }
+
+       return 0;
+}
+
+static void sampling_end(int *pmu_fd)
+{
+       int i;
+
+       for (i = 0; i < nr_cpus; i++)
+               close(pmu_fd[i]);
+}
+
+struct ipcount {
+       __u64 ip;
+       __u32 count;
+};
+
+/* used for sorting */
+struct ipcount counts[MAX_IPS];
+
+static int count_cmp(const void *p1, const void *p2)
+{
+       return ((struct ipcount *)p1)->count - ((struct ipcount *)p2)->count;
+}
+
+static void print_ip_map(int fd)
+{
+       struct ksym *sym;
+       __u64 key, next_key;
+       __u32 value;
+       int i, max;
+
+       printf("%-19s %-32s %s\n", "ADDR", "KSYM", "COUNT");
+
+       /* fetch IPs and counts */
+       key = 0, i = 0;
+       while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+               bpf_lookup_elem(fd, &next_key, &value);
+               counts[i].ip = next_key;
+               counts[i++].count = value;
+               key = next_key;
+       }
+       max = i;
+
+       /* sort and print */
+       qsort(counts, max, sizeof(struct ipcount), count_cmp);
+       for (i = 0; i < max; i++) {
+               if (counts[i].ip > PAGE_OFFSET) {
+                       sym = ksym_search(counts[i].ip);
+                       printf("0x%-17llx %-32s %u\n", counts[i].ip, sym->name,
+                              counts[i].count);
+               } else {
+                       printf("0x%-17llx %-32s %u\n", counts[i].ip, "(user)",
+                              counts[i].count);
+               }
+       }
+
+       if (max == MAX_IPS) {
+               printf("WARNING: IP hash was full (max %d entries); ", max);
+               printf("may have dropped samples\n");
+       }
+}
+
+static void int_exit(int sig)
+{
+       printf("\n");
+       print_ip_map(map_fd[0]);
+       exit(0);
+}
+
+int main(int argc, char **argv)
+{
+       char filename[256];
+       int *pmu_fd, opt, freq = DEFAULT_FREQ, secs = DEFAULT_SECS;
+
+       /* process arguments */
+       while ((opt = getopt(argc, argv, "F:h")) != -1) {
+               switch (opt) {
+               case 'F':
+                       freq = atoi(optarg);
+                       break;
+               case 'h':
+               default:
+                       usage();
+                       return 0;
+               }
+       }
+       if (argc - optind == 1)
+               secs = atoi(argv[optind]);
+       if (freq == 0 || secs == 0) {
+               usage();
+               return 1;
+       }
+
+       /* initialize kernel symbol translation */
+       if (load_kallsyms()) {
+               fprintf(stderr, "ERROR: loading /proc/kallsyms\n");
+               return 2;
+       }
+
+       /* create perf FDs for each CPU */
+       nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+       pmu_fd = malloc(nr_cpus * sizeof(int));
+       if (pmu_fd == NULL) {
+               fprintf(stderr, "ERROR: malloc of pmu_fd\n");
+               return 1;
+       }
+
+       /* load BPF program */
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+       if (load_bpf_file(filename)) {
+               fprintf(stderr, "ERROR: loading BPF program (errno %d):\n",
+                       errno);
+               if (strcmp(bpf_log_buf, "") == 0)
+                       fprintf(stderr, "Try: ulimit -l unlimited\n");
+               else
+                       fprintf(stderr, "%s", bpf_log_buf);
+               return 1;
+       }
+       signal(SIGINT, int_exit);
+
+       /* do sampling */
+       printf("Sampling at %d Hertz for %d seconds. Ctrl-C also ends.\n",
+              freq, secs);
+       if (sampling_start(pmu_fd, freq) != 0)
+               return 1;
+       sleep(secs);
+       sampling_end(pmu_fd);
+       free(pmu_fd);
+
+       /* output sample counts */
+       print_ip_map(map_fd[0]);
+
+       return 0;
+}
diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
new file mode 100644 (file)
index 0000000..7a15289
--- /dev/null
@@ -0,0 +1,191 @@
+/* Copyright (c) 2016 VMware
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/in.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/filter.h>
+#include <uapi/linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+#define ERROR(ret) do {\
+               char fmt[] = "ERROR line:%d ret:%d\n";\
+               bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
+       } while(0)
+
+struct geneve_opt {
+       __be16  opt_class;
+       u8      type;
+       u8      length:5;
+       u8      r3:1;
+       u8      r2:1;
+       u8      r1:1;
+       u8      opt_data[8]; /* hard-coded to 8 byte */
+};
+
+struct vxlan_metadata {
+       u32     gbp;
+};
+
+SEC("gre_set_tunnel")
+int _gre_set_tunnel(struct __sk_buff *skb)
+{
+       int ret;
+       struct bpf_tunnel_key key;
+
+       __builtin_memset(&key, 0x0, sizeof(key));
+       key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+       key.tunnel_id = 2;
+       key.tunnel_tos = 0;
+       key.tunnel_ttl = 64;
+
+       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+       if (ret < 0) {
+               ERROR(ret);
+               return TC_ACT_SHOT;
+       }
+
+       return TC_ACT_OK;
+}
+
+SEC("gre_get_tunnel")
+int _gre_get_tunnel(struct __sk_buff *skb)
+{
+       int ret;
+       struct bpf_tunnel_key key;
+       char fmt[] = "key %d remote ip 0x%x\n";
+
+       ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+       if (ret < 0) {
+               ERROR(ret);
+               return TC_ACT_SHOT;
+       }
+
+       bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4);
+       return TC_ACT_OK;
+}
+
+SEC("vxlan_set_tunnel")
+int _vxlan_set_tunnel(struct __sk_buff *skb)
+{
+       int ret;
+       struct bpf_tunnel_key key;
+       struct vxlan_metadata md;
+
+       __builtin_memset(&key, 0x0, sizeof(key));
+       key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+       key.tunnel_id = 2;
+       key.tunnel_tos = 0;
+       key.tunnel_ttl = 64;
+
+       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+       if (ret < 0) {
+               ERROR(ret);
+               return TC_ACT_SHOT;
+       }
+
+       md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
+       ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+       if (ret < 0) {
+               ERROR(ret);
+               return TC_ACT_SHOT;
+       }
+
+       return TC_ACT_OK;
+}
+
+SEC("vxlan_get_tunnel")
+int _vxlan_get_tunnel(struct __sk_buff *skb)
+{
+       int ret;
+       struct bpf_tunnel_key key;
+       struct vxlan_metadata md;
+       char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n";
+
+       ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+       if (ret < 0) {
+               ERROR(ret);
+               return TC_ACT_SHOT;
+       }
+
+       ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+       if (ret < 0) {
+               ERROR(ret);
+               return TC_ACT_SHOT;
+       }
+
+       bpf_trace_printk(fmt, sizeof(fmt),
+                       key.tunnel_id, key.remote_ipv4, md.gbp);
+
+       return TC_ACT_OK;
+}
+
+SEC("geneve_set_tunnel")
+int _geneve_set_tunnel(struct __sk_buff *skb)
+{
+       int ret, ret2;
+       struct bpf_tunnel_key key;
+       struct geneve_opt gopt;
+
+       __builtin_memset(&key, 0x0, sizeof(key));
+       key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+       key.tunnel_id = 2;
+       key.tunnel_tos = 0;
+       key.tunnel_ttl = 64;
+
+       __builtin_memset(&gopt, 0x0, sizeof(gopt));
+       gopt.opt_class = 0x102; /* Open Virtual Networking (OVN) */
+       gopt.type = 0x08;
+       gopt.r1 = 1;
+       gopt.r2 = 0;
+       gopt.r3 = 1;
+       gopt.length = 2; /* 4-byte multiple */
+       *(int *) &gopt.opt_data = 0xdeadbeef;
+
+       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+       if (ret < 0) {
+               ERROR(ret);
+               return TC_ACT_SHOT;
+       }
+
+       ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+       if (ret < 0) {
+               ERROR(ret);
+               return TC_ACT_SHOT;
+       }
+
+       return TC_ACT_OK;
+}
+
+SEC("geneve_get_tunnel")
+int _geneve_get_tunnel(struct __sk_buff *skb)
+{
+       int ret;
+       struct bpf_tunnel_key key;
+       struct geneve_opt gopt;
+       char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+
+       ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+       if (ret < 0) {
+               ERROR(ret);
+               return TC_ACT_SHOT;
+       }
+
+       ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+       if (ret < 0) {
+               ERROR(ret);
+               return TC_ACT_SHOT;
+       }
+
+       bpf_trace_printk(fmt, sizeof(fmt),
+                       key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+       return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
new file mode 100755 (executable)
index 0000000..4956589
--- /dev/null
@@ -0,0 +1,127 @@
+#!/bin/bash
+# In Namespace 0 (at_ns0) using native tunnel
+# Overlay IP: 10.1.1.100
+# local 192.16.1.100 remote 192.16.1.200
+# veth0 IP: 172.16.1.100, tunnel dev <type>00
+
+# Out of Namespace using BPF set/get on lwtunnel
+# Overlay IP: 10.1.1.200
+# local 172.16.1.200 remote 172.16.1.100
+# veth1 IP: 172.16.1.200, tunnel dev <type>11
+
+set -e
+
+function config_device {
+       ip netns add at_ns0
+       ip link add veth0 type veth peer name veth1
+       ip link set veth0 netns at_ns0
+       ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+       ip netns exec at_ns0 ip link set dev veth0 up
+       ip link set dev veth1 up
+       ip addr add dev veth1 172.16.1.200/24
+}
+
+function add_gre_tunnel {
+       # in namespace
+       ip netns exec at_ns0 \
+               ip link add dev $DEV_NS type $TYPE key 2 local 172.16.1.100 remote 172.16.1.200
+       ip netns exec at_ns0 ip link set dev $DEV_NS up
+       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+       # out of namespace
+       ip link add dev $DEV type $TYPE key 2 external
+       ip link set dev $DEV up
+       ip addr add dev $DEV 10.1.1.200/24
+}
+
+function add_vxlan_tunnel {
+       # Set static ARP entry here because iptables set-mark works
+       # on L3 packet, as a result not applying to ARP packets,
+       # causing errors at get_tunnel_{key/opt}.
+
+       # in namespace
+       ip netns exec at_ns0 \
+               ip link add dev $DEV_NS type $TYPE id 2 dstport 4789 gbp remote 172.16.1.200
+       ip netns exec at_ns0 ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
+       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+       ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00
+       ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
+
+       # out of namespace
+       ip link add dev $DEV type $TYPE external gbp dstport 4789
+       ip link set dev $DEV address 52:54:00:d9:02:00 up
+       ip addr add dev $DEV 10.1.1.200/24
+       arp -s 10.1.1.100 52:54:00:d9:01:00
+}
+
+function add_geneve_tunnel {
+       # in namespace
+       ip netns exec at_ns0 \
+               ip link add dev $DEV_NS type $TYPE id 2 dstport 6081 remote 172.16.1.200
+       ip netns exec at_ns0 ip link set dev $DEV_NS up
+       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+       # out of namespace
+       ip link add dev $DEV type $TYPE dstport 6081 external
+       ip link set dev $DEV up
+       ip addr add dev $DEV 10.1.1.200/24
+}
+
+function attach_bpf {
+       DEV=$1
+       SET_TUNNEL=$2
+       GET_TUNNEL=$3
+       tc qdisc add dev $DEV clsact
+       tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL
+       tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL
+}
+
+function test_gre {
+       TYPE=gretap
+       DEV_NS=gretap00
+       DEV=gretap11
+       config_device
+       add_gre_tunnel
+       attach_bpf $DEV gre_set_tunnel gre_get_tunnel
+       ping -c 1 10.1.1.100
+       ip netns exec at_ns0 ping -c 1 10.1.1.200
+}
+
+function test_vxlan {
+       TYPE=vxlan
+       DEV_NS=vxlan00
+       DEV=vxlan11
+       config_device
+       add_vxlan_tunnel
+       attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
+       ping -c 1 10.1.1.100
+       ip netns exec at_ns0 ping -c 1 10.1.1.200
+}
+
+function test_geneve {
+       TYPE=geneve
+       DEV_NS=geneve00
+       DEV=geneve11
+       config_device
+       add_geneve_tunnel
+       attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
+       ping -c 1 10.1.1.100
+       ip netns exec at_ns0 ping -c 1 10.1.1.200
+}
+
+function cleanup {
+       ip netns delete at_ns0
+       ip link del veth1
+       ip link del $DEV
+}
+
+echo "Testing GRE tunnel..."
+test_gre
+cleanup
+echo "Testing VXLAN tunnel..."
+test_vxlan
+cleanup
+echo "Testing GENEVE tunnel..."
+test_geneve
+cleanup
+echo "Success"
index 78c6f13..1f6cc9b 100644 (file)
@@ -1528,6 +1528,108 @@ static struct bpf_test tests[] = {
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS,
        },
+       {
+               "direct packet access: test5 (pkt_end >= reg, good access)",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+                       BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
+       {
+               "direct packet access: test6 (pkt_end >= reg, bad access)",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+                       BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .errstr = "invalid access to packet",
+               .result = REJECT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
+       {
+               "direct packet access: test7 (pkt_end >= reg, both accesses)",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+                       BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .errstr = "invalid access to packet",
+               .result = REJECT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
+       {
+               "direct packet access: test8 (double test, variant 1)",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+                       BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 4),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
+       {
+               "direct packet access: test9 (double test, variant 2)",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+                       BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
+                       BPF_MOV64_IMM(BPF_REG_0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
        {
                "helper access to packet: test1, valid packet_ptr range",
                .insns = {
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
new file mode 100644 (file)
index 0000000..71a8ed3
--- /dev/null
@@ -0,0 +1,65 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/bpf_perf_event.h>
+#include <uapi/linux/perf_event.h>
+#include "bpf_helpers.h"
+
+struct key_t {
+       char comm[TASK_COMM_LEN];
+       u32 kernstack;
+       u32 userstack;
+};
+
+struct bpf_map_def SEC("maps") counts = {
+       .type = BPF_MAP_TYPE_HASH,
+       .key_size = sizeof(struct key_t),
+       .value_size = sizeof(u64),
+       .max_entries = 10000,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+       .type = BPF_MAP_TYPE_STACK_TRACE,
+       .key_size = sizeof(u32),
+       .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
+       .max_entries = 10000,
+};
+
+#define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
+#define USER_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK)
+
+SEC("perf_event")
+int bpf_prog1(struct bpf_perf_event_data *ctx)
+{
+       char fmt[] = "CPU-%d period %lld ip %llx";
+       u32 cpu = bpf_get_smp_processor_id();
+       struct key_t key;
+       u64 *val, one = 1;
+
+       if (ctx->sample_period < 10000)
+               /* ignore warmup */
+               return 0;
+       bpf_get_current_comm(&key.comm, sizeof(key.comm));
+       key.kernstack = bpf_get_stackid(ctx, &stackmap, KERN_STACKID_FLAGS);
+       key.userstack = bpf_get_stackid(ctx, &stackmap, USER_STACKID_FLAGS);
+       if ((int)key.kernstack < 0 && (int)key.userstack < 0) {
+               bpf_trace_printk(fmt, sizeof(fmt), cpu, ctx->sample_period,
+                                ctx->regs.ip);
+               return 0;
+       }
+
+       val = bpf_map_lookup_elem(&counts, &key);
+       if (val)
+               (*val)++;
+       else
+               bpf_map_update_elem(&counts, &key, &one, BPF_NOEXIST);
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
new file mode 100644 (file)
index 0000000..9a130d3
--- /dev/null
@@ -0,0 +1,213 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <linux/perf_event.h>
+#include <linux/bpf.h>
+#include <signal.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/resource.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define SAMPLE_FREQ 50
+
+static bool sys_read_seen, sys_write_seen;
+
+static void print_ksym(__u64 addr)
+{
+       struct ksym *sym;
+
+       if (!addr)
+               return;
+       sym = ksym_search(addr);
+       printf("%s;", sym->name);
+       if (!strcmp(sym->name, "sys_read"))
+               sys_read_seen = true;
+       else if (!strcmp(sym->name, "sys_write"))
+               sys_write_seen = true;
+}
+
+static void print_addr(__u64 addr)
+{
+       if (!addr)
+               return;
+       printf("%llx;", addr);
+}
+
+#define TASK_COMM_LEN 16
+
+struct key_t {
+       char comm[TASK_COMM_LEN];
+       __u32 kernstack;
+       __u32 userstack;
+};
+
+static void print_stack(struct key_t *key, __u64 count)
+{
+       __u64 ip[PERF_MAX_STACK_DEPTH] = {};
+       static bool warned;
+       int i;
+
+       printf("%3lld %s;", count, key->comm);
+       if (bpf_lookup_elem(map_fd[1], &key->kernstack, ip) != 0) {
+               printf("---;");
+       } else {
+               for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
+                       print_ksym(ip[i]);
+       }
+       printf("-;");
+       if (bpf_lookup_elem(map_fd[1], &key->userstack, ip) != 0) {
+               printf("---;");
+       } else {
+               for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
+                       print_addr(ip[i]);
+       }
+       printf("\n");
+
+       if (key->kernstack == -EEXIST && !warned) {
+               printf("stackmap collisions seen. Consider increasing size\n");
+               warned = true;
+       } else if ((int)key->kernstack < 0 && (int)key->userstack < 0) {
+               printf("err stackid %d %d\n", key->kernstack, key->userstack);
+       }
+}
+
+static void int_exit(int sig)
+{
+       kill(0, SIGKILL);
+       exit(0);
+}
+
+static void print_stacks(void)
+{
+       struct key_t key = {}, next_key;
+       __u64 value;
+       __u32 stackid = 0, next_id;
+       int fd = map_fd[0], stack_map = map_fd[1];
+
+       sys_read_seen = sys_write_seen = false;
+       while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+               bpf_lookup_elem(fd, &next_key, &value);
+               print_stack(&next_key, value);
+               bpf_delete_elem(fd, &next_key);
+               key = next_key;
+       }
+
+       if (!sys_read_seen || !sys_write_seen) {
+               printf("BUG kernel stack doesn't contain sys_read() and sys_write()\n");
+               int_exit(0);
+       }
+
+       /* clear stack map */
+       while (bpf_get_next_key(stack_map, &stackid, &next_id) == 0) {
+               bpf_delete_elem(stack_map, &next_id);
+               stackid = next_id;
+       }
+}
+
+static void test_perf_event_all_cpu(struct perf_event_attr *attr)
+{
+       int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+       int *pmu_fd = malloc(nr_cpus * sizeof(int));
+       int i;
+
+       /* open perf_event on all cpus */
+       for (i = 0; i < nr_cpus; i++) {
+               pmu_fd[i] = perf_event_open(attr, -1, i, -1, 0);
+               if (pmu_fd[i] < 0) {
+                       printf("perf_event_open failed\n");
+                       goto all_cpu_err;
+               }
+               assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
+               assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0);
+       }
+       system("dd if=/dev/zero of=/dev/null count=5000k");
+       print_stacks();
+all_cpu_err:
+       for (i--; i >= 0; i--)
+               close(pmu_fd[i]);
+       free(pmu_fd);
+}
+
+static void test_perf_event_task(struct perf_event_attr *attr)
+{
+       int pmu_fd;
+
+       /* open task bound event */
+       pmu_fd = perf_event_open(attr, 0, -1, -1, 0);
+       if (pmu_fd < 0) {
+               printf("perf_event_open failed\n");
+               return;
+       }
+       assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
+       assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0) == 0);
+       system("dd if=/dev/zero of=/dev/null count=5000k");
+       print_stacks();
+       close(pmu_fd);
+}
+
+static void test_bpf_perf_event(void)
+{
+       struct perf_event_attr attr_type_hw = {
+               .sample_freq = SAMPLE_FREQ,
+               .freq = 1,
+               .type = PERF_TYPE_HARDWARE,
+               .config = PERF_COUNT_HW_CPU_CYCLES,
+               .inherit = 1,
+       };
+       struct perf_event_attr attr_type_sw = {
+               .sample_freq = SAMPLE_FREQ,
+               .freq = 1,
+               .type = PERF_TYPE_SOFTWARE,
+               .config = PERF_COUNT_SW_CPU_CLOCK,
+               .inherit = 1,
+       };
+
+       test_perf_event_all_cpu(&attr_type_hw);
+       test_perf_event_task(&attr_type_hw);
+       test_perf_event_all_cpu(&attr_type_sw);
+       test_perf_event_task(&attr_type_sw);
+}
+
+
+int main(int argc, char **argv)
+{
+       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+       char filename[256];
+
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+       setrlimit(RLIMIT_MEMLOCK, &r);
+
+       signal(SIGINT, int_exit);
+
+       if (load_kallsyms()) {
+               printf("failed to process /proc/kallsyms\n");
+               return 1;
+       }
+
+       if (load_bpf_file(filename)) {
+               printf("%s", bpf_log_buf);
+               return 2;
+       }
+
+       if (fork() == 0) {
+               read_trace_pipe();
+               return 0;
+       }
+       test_bpf_perf_event();
+
+       int_exit(0);
+       return 0;
+}
index 4de3cc4..206a6b3 100755 (executable)
@@ -3570,15 +3570,6 @@ sub process {
                        }
                }
 
-# check for uses of DEFINE_PCI_DEVICE_TABLE
-               if ($line =~ /\bDEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=/) {
-                       if (WARN("DEFINE_PCI_DEVICE_TABLE",
-                                "Prefer struct pci_device_id over deprecated DEFINE_PCI_DEVICE_TABLE\n" . $herecurr) &&
-                           $fix) {
-                               $fixed[$fixlinenr] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=\s*/static const struct pci_device_id $1\[\] = /;
-                       }
-               }
-
 # check for new typedefs, only function parameters and sparse annotations
 # make sense.
                if ($line =~ /\btypedef\s/ &&
index 49a00d5..aed4511 100755 (executable)
@@ -2136,9 +2136,11 @@ sub vcs_file_exists {
 
     my $cmd = $VCS_cmds{"file_exists_cmd"};
     $cmd =~ s/(\$\w+)/$1/eeg;          # interpolate $cmd
-
+    $cmd .= " 2>&1";
     $exists = &{$VCS_cmds{"execute_cmd"}}($cmd);
 
+    return 0 if ($? != 0);
+
     return $exists;
 }
 
index e1c09e2..8ea9fd2 100755 (executable)
@@ -332,7 +332,9 @@ if grep -q '^CONFIG_STACK_VALIDATION=y' $KCONFIG_CONFIG ; then
        (cd $objtree; find tools/objtool -type f -executable) >> "$objtree/debian/hdrobjfiles"
 fi
 (cd $objtree; find arch/$SRCARCH/include Module.symvers include scripts -type f) >> "$objtree/debian/hdrobjfiles"
-(cd $objtree; find scripts/gcc-plugins -name \*.so -o -name gcc-common.h) >> "$objtree/debian/hdrobjfiles"
+if grep -q '^CONFIG_GCC_PLUGINS=y' $KCONFIG_CONFIG ; then
+       (cd $objtree; find scripts/gcc-plugins -name \*.so -o -name gcc-common.h) >> "$objtree/debian/hdrobjfiles"
+fi
 destdir=$kernel_headers_dir/usr/src/linux-headers-$version
 mkdir -p "$destdir"
 (cd $srctree; tar -c -f - -T -) < "$objtree/debian/hdrsrcfiles" | (cd $destdir; tar -xf -)
index ed7eef2..b3775a9 100755 (executable)
@@ -206,7 +206,6 @@ regex_c=(
        '/\<DEFINE_PER_CPU_SHARED_ALIGNED([^,]*, *\([[:alnum:]_]*\)/\1/v/'
        '/\<DECLARE_WAIT_QUEUE_HEAD(\([[:alnum:]_]*\)/\1/v/'
        '/\<DECLARE_\(TASKLET\|WORK\|DELAYED_WORK\)(\([[:alnum:]_]*\)/\2/v/'
-       '/\<DEFINE_PCI_DEVICE_TABLE(\([[:alnum:]_]*\)/\1/v/'
        '/\(^\s\)OFFSET(\([[:alnum:]_]*\)/\2/v/'
        '/\(^\s\)DEFINE(\([[:alnum:]_]*\)/\2/v/'
        '/\<DEFINE_HASHTABLE(\([[:alnum:]_]*\)/\1/v/'
index df28f2b..118f454 100644 (file)
@@ -136,6 +136,7 @@ config HAVE_ARCH_HARDENED_USERCOPY
 config HARDENED_USERCOPY
        bool "Harden memory copies between kernel and userspace"
        depends on HAVE_ARCH_HARDENED_USERCOPY
+       depends on HAVE_HARDENED_USERCOPY_ALLOCATOR
        select BUG
        help
          This option checks for obviously wrong memory regions when
@@ -146,6 +147,17 @@ config HARDENED_USERCOPY
          or are part of the kernel text. This kills entire classes
          of heap overflow exploits and similar kernel memory exposures.
 
+config HARDENED_USERCOPY_PAGESPAN
+       bool "Refuse to copy allocations that span multiple pages"
+       depends on HARDENED_USERCOPY
+       depends on EXPERT
+       help
+         When a multi-page allocation is done without __GFP_COMP,
+         hardened usercopy will reject attempts to copy it. There are,
+         however, several cases of this in the kernel that have not all
+         been removed. This config is intended to be used only while
+         trying to find such users.
+
 source security/selinux/Kconfig
 source security/smack/Kconfig
 source security/tomoyo/Kconfig
index 795437b..b450a27 100644 (file)
@@ -1633,11 +1633,13 @@ static int snd_rawmidi_dev_register(struct snd_device *device)
                return -EBUSY;
        }
        list_add_tail(&rmidi->list, &snd_rawmidi_devices);
+       mutex_unlock(&register_mutex);
        err = snd_register_device(SNDRV_DEVICE_TYPE_RAWMIDI,
                                  rmidi->card, rmidi->device,
                                  &snd_rawmidi_f_ops, rmidi, &rmidi->dev);
        if (err < 0) {
                rmidi_err(rmidi, "unable to register\n");
+               mutex_lock(&register_mutex);
                list_del(&rmidi->list);
                mutex_unlock(&register_mutex);
                return err;
@@ -1645,6 +1647,7 @@ static int snd_rawmidi_dev_register(struct snd_device *device)
        if (rmidi->ops && rmidi->ops->dev_register &&
            (err = rmidi->ops->dev_register(rmidi)) < 0) {
                snd_unregister_device(&rmidi->dev);
+               mutex_lock(&register_mutex);
                list_del(&rmidi->list);
                mutex_unlock(&register_mutex);
                return err;
@@ -1677,7 +1680,6 @@ static int snd_rawmidi_dev_register(struct snd_device *device)
                }
        }
 #endif /* CONFIG_SND_OSSEMUL */
-       mutex_unlock(&register_mutex);
        sprintf(name, "midi%d", rmidi->device);
        entry = snd_info_create_card_entry(rmidi->card, name, rmidi->card->proc_root);
        if (entry) {
index 9a6157e..fc144f4 100644 (file)
@@ -35,6 +35,9 @@
 #include <sound/initval.h>
 #include <linux/kmod.h>
 
+/* internal flags */
+#define SNDRV_TIMER_IFLG_PAUSED                0x00010000
+
 #if IS_ENABLED(CONFIG_SND_HRTIMER)
 #define DEFAULT_TIMER_LIMIT 4
 #else
@@ -294,8 +297,21 @@ int snd_timer_open(struct snd_timer_instance **ti,
                get_device(&timer->card->card_dev);
        timeri->slave_class = tid->dev_sclass;
        timeri->slave_id = slave_id;
-       if (list_empty(&timer->open_list_head) && timer->hw.open)
-               timer->hw.open(timer);
+
+       if (list_empty(&timer->open_list_head) && timer->hw.open) {
+               int err = timer->hw.open(timer);
+               if (err) {
+                       kfree(timeri->owner);
+                       kfree(timeri);
+
+                       if (timer->card)
+                               put_device(&timer->card->card_dev);
+                       module_put(timer->module);
+                       mutex_unlock(&register_mutex);
+                       return err;
+               }
+       }
+
        list_add_tail(&timeri->open_list, &timer->open_list_head);
        snd_timer_check_master(timeri);
        mutex_unlock(&register_mutex);
@@ -526,6 +542,10 @@ static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop)
                }
        }
        timeri->flags &= ~(SNDRV_TIMER_IFLG_RUNNING | SNDRV_TIMER_IFLG_START);
+       if (stop)
+               timeri->flags &= ~SNDRV_TIMER_IFLG_PAUSED;
+       else
+               timeri->flags |= SNDRV_TIMER_IFLG_PAUSED;
        snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP :
                          SNDRV_TIMER_EVENT_CONTINUE);
  unlock:
@@ -587,6 +607,10 @@ int snd_timer_stop(struct snd_timer_instance *timeri)
  */
 int snd_timer_continue(struct snd_timer_instance *timeri)
 {
+       /* timer can continue only after pause */
+       if (!(timeri->flags & SNDRV_TIMER_IFLG_PAUSED))
+               return -EINVAL;
+
        if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE)
                return snd_timer_start_slave(timeri, false);
        else
@@ -813,6 +837,7 @@ int snd_timer_new(struct snd_card *card, char *id, struct snd_timer_id *tid,
        timer->tmr_subdevice = tid->subdevice;
        if (id)
                strlcpy(timer->id, id, sizeof(timer->id));
+       timer->sticks = 1;
        INIT_LIST_HEAD(&timer->device_list);
        INIT_LIST_HEAD(&timer->open_list_head);
        INIT_LIST_HEAD(&timer->active_list_head);
@@ -1817,6 +1842,9 @@ static int snd_timer_user_continue(struct file *file)
        tu = file->private_data;
        if (!tu->timeri)
                return -EBADFD;
+       /* start timer instead of continue if it's not used before */
+       if (!(tu->timeri->flags & SNDRV_TIMER_IFLG_PAUSED))
+               return snd_timer_user_start(file);
        tu->timeri->lost = 0;
        return (err = snd_timer_continue(tu->timeri)) < 0 ? err : 0;
 }
@@ -1958,6 +1986,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
                tu->qused--;
                spin_unlock_irq(&tu->qlock);
 
+               mutex_lock(&tu->ioctl_lock);
                if (tu->tread) {
                        if (copy_to_user(buffer, &tu->tqueue[qhead],
                                         sizeof(struct snd_timer_tread)))
@@ -1967,6 +1996,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer,
                                         sizeof(struct snd_timer_read)))
                                err = -EFAULT;
                }
+               mutex_unlock(&tu->ioctl_lock);
 
                spin_lock_irq(&tu->qlock);
                if (err < 0)
index 03ed352..d73c12b 100644 (file)
@@ -108,7 +108,6 @@ struct snd_efw {
        u8 *resp_buf;
        u8 *pull_ptr;
        u8 *push_ptr;
-       unsigned int resp_queues;
 };
 
 int snd_efw_transaction_cmd(struct fw_unit *unit,
index 33df865..2e1d9a2 100644 (file)
@@ -25,6 +25,7 @@ hwdep_read_resp_buf(struct snd_efw *efw, char __user *buf, long remained,
 {
        unsigned int length, till_end, type;
        struct snd_efw_transaction *t;
+       u8 *pull_ptr;
        long count = 0;
 
        if (remained < sizeof(type) + sizeof(struct snd_efw_transaction))
@@ -38,8 +39,17 @@ hwdep_read_resp_buf(struct snd_efw *efw, char __user *buf, long remained,
        buf += sizeof(type);
 
        /* write into buffer as many responses as possible */
-       while (efw->resp_queues > 0) {
-               t = (struct snd_efw_transaction *)(efw->pull_ptr);
+       spin_lock_irq(&efw->lock);
+
+       /*
+        * When another task reaches here during this task's access to user
+        * space, it picks up current position in buffer and can read the same
+        * series of responses.
+        */
+       pull_ptr = efw->pull_ptr;
+
+       while (efw->push_ptr != pull_ptr) {
+               t = (struct snd_efw_transaction *)(pull_ptr);
                length = be32_to_cpu(t->length) * sizeof(__be32);
 
                /* confirm enough space for this response */
@@ -49,26 +59,39 @@ hwdep_read_resp_buf(struct snd_efw *efw, char __user *buf, long remained,
                /* copy from ring buffer to user buffer */
                while (length > 0) {
                        till_end = snd_efw_resp_buf_size -
-                               (unsigned int)(efw->pull_ptr - efw->resp_buf);
+                               (unsigned int)(pull_ptr - efw->resp_buf);
                        till_end = min_t(unsigned int, length, till_end);
 
-                       if (copy_to_user(buf, efw->pull_ptr, till_end))
+                       spin_unlock_irq(&efw->lock);
+
+                       if (copy_to_user(buf, pull_ptr, till_end))
                                return -EFAULT;
 
-                       efw->pull_ptr += till_end;
-                       if (efw->pull_ptr >= efw->resp_buf +
-                                            snd_efw_resp_buf_size)
-                               efw->pull_ptr -= snd_efw_resp_buf_size;
+                       spin_lock_irq(&efw->lock);
+
+                       pull_ptr += till_end;
+                       if (pull_ptr >= efw->resp_buf + snd_efw_resp_buf_size)
+                               pull_ptr -= snd_efw_resp_buf_size;
 
                        length -= till_end;
                        buf += till_end;
                        count += till_end;
                        remained -= till_end;
                }
-
-               efw->resp_queues--;
        }
 
+       /*
+        * All of tasks can read from the buffer nearly simultaneously, but the
+        * last position for each task is different depending on the length of
+        * given buffer. Here, for simplicity, a position of buffer is set by
+        * the latest task. It's better for a listening application to allow one
+        * thread to read from the buffer. Unless, each task can read different
+        * sequence of responses depending on variation of buffer length.
+        */
+       efw->pull_ptr = pull_ptr;
+
+       spin_unlock_irq(&efw->lock);
+
        return count;
 }
 
@@ -76,14 +99,17 @@ static long
 hwdep_read_locked(struct snd_efw *efw, char __user *buf, long count,
                  loff_t *offset)
 {
-       union snd_firewire_event event;
+       union snd_firewire_event event = {
+               .lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS,
+       };
 
-       memset(&event, 0, sizeof(event));
+       spin_lock_irq(&efw->lock);
 
-       event.lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS;
        event.lock_status.status = (efw->dev_lock_count > 0);
        efw->dev_lock_changed = false;
 
+       spin_unlock_irq(&efw->lock);
+
        count = min_t(long, count, sizeof(event.lock_status));
 
        if (copy_to_user(buf, &event, count))
@@ -98,10 +124,15 @@ hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count,
 {
        struct snd_efw *efw = hwdep->private_data;
        DEFINE_WAIT(wait);
+       bool dev_lock_changed;
+       bool queued;
 
        spin_lock_irq(&efw->lock);
 
-       while ((!efw->dev_lock_changed) && (efw->resp_queues == 0)) {
+       dev_lock_changed = efw->dev_lock_changed;
+       queued = efw->push_ptr != efw->pull_ptr;
+
+       while (!dev_lock_changed && !queued) {
                prepare_to_wait(&efw->hwdep_wait, &wait, TASK_INTERRUPTIBLE);
                spin_unlock_irq(&efw->lock);
                schedule();
@@ -109,15 +140,17 @@ hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count,
                if (signal_pending(current))
                        return -ERESTARTSYS;
                spin_lock_irq(&efw->lock);
+               dev_lock_changed = efw->dev_lock_changed;
+               queued = efw->push_ptr != efw->pull_ptr;
        }
 
-       if (efw->dev_lock_changed)
+       spin_unlock_irq(&efw->lock);
+
+       if (dev_lock_changed)
                count = hwdep_read_locked(efw, buf, count, offset);
-       else if (efw->resp_queues > 0)
+       else if (queued)
                count = hwdep_read_resp_buf(efw, buf, count, offset);
 
-       spin_unlock_irq(&efw->lock);
-
        return count;
 }
 
@@ -160,7 +193,7 @@ hwdep_poll(struct snd_hwdep *hwdep, struct file *file, poll_table *wait)
        poll_wait(file, &efw->hwdep_wait, wait);
 
        spin_lock_irq(&efw->lock);
-       if (efw->dev_lock_changed || (efw->resp_queues > 0))
+       if (efw->dev_lock_changed || efw->pull_ptr != efw->push_ptr)
                events = POLLIN | POLLRDNORM;
        else
                events = 0;
index 0639dcb..beb0a0f 100644 (file)
@@ -188,8 +188,8 @@ proc_read_queues_state(struct snd_info_entry *entry,
        else
                consumed = (unsigned int)(efw->push_ptr - efw->pull_ptr);
 
-       snd_iprintf(buffer, "%d %d/%d\n",
-                   efw->resp_queues, consumed, snd_efw_resp_buf_size);
+       snd_iprintf(buffer, "%d/%d\n",
+                   consumed, snd_efw_resp_buf_size);
 }
 
 static void
index f550808..36a08ba 100644 (file)
@@ -121,11 +121,11 @@ copy_resp_to_buf(struct snd_efw *efw, void *data, size_t length, int *rcode)
        size_t capacity, till_end;
        struct snd_efw_transaction *t;
 
-       spin_lock_irq(&efw->lock);
-
        t = (struct snd_efw_transaction *)data;
        length = min_t(size_t, be32_to_cpu(t->length) * sizeof(u32), length);
 
+       spin_lock_irq(&efw->lock);
+
        if (efw->push_ptr < efw->pull_ptr)
                capacity = (unsigned int)(efw->pull_ptr - efw->push_ptr);
        else
@@ -155,7 +155,6 @@ copy_resp_to_buf(struct snd_efw *efw, void *data, size_t length, int *rcode)
        }
 
        /* for hwdep */
-       efw->resp_queues++;
        wake_up(&efw->hwdep_wait);
 
        *rcode = RCODE_COMPLETE;
index 131267c..106406c 100644 (file)
 
 #include "tascam.h"
 
-static long hwdep_read_locked(struct snd_tscm *tscm, char __user *buf,
-                             long count)
-{
-       union snd_firewire_event event;
-
-       memset(&event, 0, sizeof(event));
-
-       event.lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS;
-       event.lock_status.status = (tscm->dev_lock_count > 0);
-       tscm->dev_lock_changed = false;
-
-       count = min_t(long, count, sizeof(event.lock_status));
-
-       if (copy_to_user(buf, &event, count))
-               return -EFAULT;
-
-       return count;
-}
-
 static long hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count,
                       loff_t *offset)
 {
        struct snd_tscm *tscm = hwdep->private_data;
        DEFINE_WAIT(wait);
-       union snd_firewire_event event;
+       union snd_firewire_event event = {
+               .lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS,
+       };
 
        spin_lock_irq(&tscm->lock);
 
@@ -54,10 +37,16 @@ static long hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count,
                spin_lock_irq(&tscm->lock);
        }
 
-       memset(&event, 0, sizeof(event));
-       count = hwdep_read_locked(tscm, buf, count);
+       event.lock_status.status = (tscm->dev_lock_count > 0);
+       tscm->dev_lock_changed = false;
+
        spin_unlock_irq(&tscm->lock);
 
+       count = min_t(long, count, sizeof(event.lock_status));
+
+       if (copy_to_user(buf, &event, count))
+               return -EFAULT;
+
        return count;
 }
 
index 574b1b4..575cefd 100644 (file)
@@ -4828,7 +4828,7 @@ enum {
        ALC293_FIXUP_DELL1_MIC_NO_PRESENCE,
        ALC292_FIXUP_TPT440_DOCK,
        ALC292_FIXUP_TPT440,
-       ALC283_FIXUP_BXBT2807_MIC,
+       ALC283_FIXUP_HEADSET_MIC,
        ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED,
        ALC282_FIXUP_ASPIRE_V5_PINS,
        ALC280_FIXUP_HP_GPIO4,
@@ -4855,6 +4855,7 @@ enum {
        ALC221_FIXUP_HP_FRONT_MIC,
        ALC292_FIXUP_TPT460,
        ALC298_FIXUP_SPK_VOLUME,
+       ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5321,7 +5322,7 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC292_FIXUP_TPT440_DOCK,
        },
-       [ALC283_FIXUP_BXBT2807_MIC] = {
+       [ALC283_FIXUP_HEADSET_MIC] = {
                .type = HDA_FIXUP_PINS,
                .v.pins = (const struct hda_pintbl[]) {
                        { 0x19, 0x04a110f0 },
@@ -5516,6 +5517,15 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
        },
+       [ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x1b, 0x90170151 },
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5560,6 +5570,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
        SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
        SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13 9350", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+       SND_PCI_QUIRK(0x1028, 0x0706, "Dell Inspiron 7559", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
        SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
        SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
        SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
@@ -5651,7 +5662,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN),
        SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
        SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
-       SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_BXBT2807_MIC),
+       SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
        SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE),
        SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE),
@@ -5894,6 +5906,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x12, 0x90a60170},
                {0x14, 0x90170120},
                {0x21, 0x02211030}),
+       SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell Inspiron 5468", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+               {0x12, 0x90a60180},
+               {0x14, 0x90170120},
+               {0x21, 0x02211030}),
        SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
                ALC256_STANDARD_PINS),
        SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4,
index 54c09ac..16e459a 100644 (file)
@@ -299,8 +299,9 @@ static int atmel_ssc_startup(struct snd_pcm_substream *substream,
        clk_enable(ssc_p->ssc->clk);
        ssc_p->mck_rate = clk_get_rate(ssc_p->ssc->clk);
 
-       /* Reset the SSC to keep it at a clean status */
-       ssc_writel(ssc_p->ssc->regs, CR, SSC_BIT(CR_SWRST));
+       /* Reset the SSC unless initialized to keep it in a clean state */
+       if (!ssc_p->initialized)
+               ssc_writel(ssc_p->ssc->regs, CR, SSC_BIT(CR_SWRST));
 
        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
                dir = 0;
index e5527bc..bcf1834 100644 (file)
@@ -1247,8 +1247,8 @@ static int da7213_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
                return -EINVAL;
        }
 
-       /* By default only 32 BCLK per WCLK is supported */
-       dai_clk_mode |= DA7213_DAI_BCLKS_PER_WCLK_32;
+       /* By default only 64 BCLK per WCLK is supported */
+       dai_clk_mode |= DA7213_DAI_BCLKS_PER_WCLK_64;
 
        snd_soc_write(codec, DA7213_DAI_CLK_MODE, dai_clk_mode);
        snd_soc_update_bits(codec, DA7213_DAI_CTRL, DA7213_DAI_FORMAT_MASK,
index cf0a39b..02352ed 100644 (file)
@@ -412,6 +412,7 @@ static int max98371_i2c_remove(struct i2c_client *client)
 
 static const struct i2c_device_id max98371_i2c_id[] = {
        { "max98371", 0 },
+       { }
 };
 
 MODULE_DEVICE_TABLE(i2c, max98371_i2c_id);
index 5c9707a..2e59a85 100644 (file)
@@ -212,31 +212,6 @@ static const unsigned short logtable[256] = {
        0xfa2f, 0xfaea, 0xfba5, 0xfc60, 0xfd1a, 0xfdd4, 0xfe8e, 0xff47
 };
 
-static struct snd_soc_dai *nau8825_get_codec_dai(struct nau8825 *nau8825)
-{
-       struct snd_soc_codec *codec = snd_soc_dapm_to_codec(nau8825->dapm);
-       struct snd_soc_component *component = &codec->component;
-       struct snd_soc_dai *codec_dai, *_dai;
-
-       list_for_each_entry_safe(codec_dai, _dai, &component->dai_list, list) {
-               if (!strncmp(codec_dai->name, NUVOTON_CODEC_DAI,
-                       strlen(NUVOTON_CODEC_DAI)))
-                       return codec_dai;
-       }
-       return NULL;
-}
-
-static bool nau8825_dai_is_active(struct nau8825 *nau8825)
-{
-       struct snd_soc_dai *codec_dai = nau8825_get_codec_dai(nau8825);
-
-       if (codec_dai) {
-               if (codec_dai->playback_active || codec_dai->capture_active)
-                       return true;
-       }
-       return false;
-}
-
 /**
  * nau8825_sema_acquire - acquire the semaphore of nau88l25
  * @nau8825:  component to register the codec private data with
@@ -250,19 +225,26 @@ static bool nau8825_dai_is_active(struct nau8825 *nau8825)
  * Acquires the semaphore without jiffies. If no more tasks are allowed
  * to acquire the semaphore, calling this function will put the task to
  * sleep until the semaphore is released.
- * It returns if the semaphore was acquired.
+ * If the semaphore is not released within the specified number of jiffies,
+ * this function returns -ETIME.
+ * If the sleep is interrupted by a signal, this function will return -EINTR.
+ * It returns 0 if the semaphore was acquired successfully.
  */
-static void nau8825_sema_acquire(struct nau8825 *nau8825, long timeout)
+static int nau8825_sema_acquire(struct nau8825 *nau8825, long timeout)
 {
        int ret;
 
-       if (timeout)
+       if (timeout) {
                ret = down_timeout(&nau8825->xtalk_sem, timeout);
-       else
+               if (ret < 0)
+                       dev_warn(nau8825->dev, "Acquire semaphone timeout\n");
+       } else {
                ret = down_interruptible(&nau8825->xtalk_sem);
+               if (ret < 0)
+                       dev_warn(nau8825->dev, "Acquire semaphone fail\n");
+       }
 
-       if (ret < 0)
-               dev_warn(nau8825->dev, "Acquire semaphone fail\n");
+       return ret;
 }
 
 /**
@@ -1205,6 +1187,8 @@ static int nau8825_hw_params(struct snd_pcm_substream *substream,
        struct nau8825 *nau8825 = snd_soc_codec_get_drvdata(codec);
        unsigned int val_len = 0;
 
+       nau8825_sema_acquire(nau8825, 2 * HZ);
+
        switch (params_width(params)) {
        case 16:
                val_len |= NAU8825_I2S_DL_16;
@@ -1225,6 +1209,9 @@ static int nau8825_hw_params(struct snd_pcm_substream *substream,
        regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL1,
                NAU8825_I2S_DL_MASK, val_len);
 
+       /* Release the semaphone. */
+       nau8825_sema_release(nau8825);
+
        return 0;
 }
 
@@ -1234,6 +1221,8 @@ static int nau8825_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
        struct nau8825 *nau8825 = snd_soc_codec_get_drvdata(codec);
        unsigned int ctrl1_val = 0, ctrl2_val = 0;
 
+       nau8825_sema_acquire(nau8825, 2 * HZ);
+
        switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
        case SND_SOC_DAIFMT_CBM_CFM:
                ctrl2_val |= NAU8825_I2S_MS_MASTER;
@@ -1282,6 +1271,9 @@ static int nau8825_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
        regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL2,
                NAU8825_I2S_MS_MASK, ctrl2_val);
 
+       /* Release the semaphone. */
+       nau8825_sema_release(nau8825);
+
        return 0;
 }
 
@@ -1611,8 +1603,11 @@ static irqreturn_t nau8825_interrupt(int irq, void *data)
                                         * cess and restore changes if process
                                         * is ongoing when ejection.
                                         */
+                                       int ret;
                                        nau8825->xtalk_protect = true;
-                                       nau8825_sema_acquire(nau8825, 0);
+                                       ret = nau8825_sema_acquire(nau8825, 0);
+                                       if (ret < 0)
+                                               nau8825->xtalk_protect = false;
                                }
                                /* Startup cross talk detection process */
                                nau8825->xtalk_state = NAU8825_XTALK_PREPARE;
@@ -2238,23 +2233,14 @@ static int __maybe_unused nau8825_suspend(struct snd_soc_codec *codec)
 static int __maybe_unused nau8825_resume(struct snd_soc_codec *codec)
 {
        struct nau8825 *nau8825 = snd_soc_codec_get_drvdata(codec);
+       int ret;
 
        regcache_cache_only(nau8825->regmap, false);
        regcache_sync(nau8825->regmap);
-       if (nau8825_is_jack_inserted(nau8825->regmap)) {
-               /* If the jack is inserted, we need to check whether the play-
-                * back is active before suspend. If active, the driver has to
-                * raise the protection for cross talk function to avoid the
-                * playback recovers before cross talk process finish. Other-
-                * wise, the playback will be interfered by cross talk func-
-                * tion. It is better to apply hardware related parameters
-                * before starting playback or record.
-                */
-               if (nau8825_dai_is_active(nau8825)) {
-                       nau8825->xtalk_protect = true;
-                       nau8825_sema_acquire(nau8825, 0);
-               }
-       }
+       nau8825->xtalk_protect = true;
+       ret = nau8825_sema_acquire(nau8825, 0);
+       if (ret < 0)
+               nau8825->xtalk_protect = false;
        enable_irq(nau8825->irq);
 
        return 0;
index a67ea10..f266439 100644 (file)
@@ -581,7 +581,7 @@ static int wm2000_anc_transition(struct wm2000_priv *wm2000,
        if (anc_transitions[i].dest == ANC_OFF)
                clk_disable_unprepare(wm2000->mclk);
 
-       return ret;
+       return 0;
 }
 
 static int wm2000_anc_set_mode(struct wm2000_priv *wm2000)
index 45602ca..2d53c8d 100644 (file)
@@ -1,5 +1,5 @@
-obj-$(CONFIG_SND_SIMPLE_CARD_UTILS) := simple-card-utils.o
-
+snd-soc-simple-card-utils-objs := simple-card-utils.o
 snd-soc-simple-card-objs       := simple-card.o
 
-obj-$(CONFIG_SND_SIMPLE_CARD)  += snd-soc-simple-card.o
+obj-$(CONFIG_SND_SIMPLE_CARD_UTILS)    += snd-soc-simple-card-utils.o
+obj-$(CONFIG_SND_SIMPLE_CARD)          += snd-soc-simple-card.o
index d89a9a1..9599de6 100644 (file)
@@ -7,6 +7,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#include <linux/module.h>
 #include <linux/of.h>
 #include <sound/simple_card_utils.h>
 
@@ -95,3 +96,8 @@ int asoc_simple_card_parse_card_name(struct snd_soc_card *card,
        return 0;
 }
 EXPORT_SYMBOL_GPL(asoc_simple_card_parse_card_name);
+
+/* Module information */
+MODULE_AUTHOR("Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>");
+MODULE_DESCRIPTION("ALSA SoC Simple Card Utils");
+MODULE_LICENSE("GPL v2");
index 25fcb79..ddcb52a 100644 (file)
@@ -123,6 +123,11 @@ int snd_skl_get_module_info(struct skl_sst *ctx, u8 *uuid,
 
        uuid_mod = (uuid_le *)uuid;
 
+       if (list_empty(&ctx->uuid_list)) {
+               dev_err(ctx->dev, "Module list is empty\n");
+               return -EINVAL;
+       }
+
        list_for_each_entry(module, &ctx->uuid_list, list) {
                if (uuid_le_cmp(*uuid_mod, module->uuid) == 0) {
                        dfw_config->module_id = module->id;
index cd59536..e3e7641 100644 (file)
@@ -672,8 +672,10 @@ static int skl_probe(struct pci_dev *pci,
 
        skl->nhlt = skl_nhlt_init(bus->dev);
 
-       if (skl->nhlt == NULL)
+       if (skl->nhlt == NULL) {
+               err = -ENODEV;
                goto out_free;
+       }
 
        skl_nhlt_update_topology_bin(skl);
 
index 0843a68..f61b3b5 100644 (file)
 struct abe_twl6040 {
        int     jack_detection; /* board can detect jack events */
        int     mclk_freq;      /* MCLK frequency speed for twl6040 */
-
-       struct platform_device *dmic_codec_dev;
 };
 
+struct platform_device *dmic_codec_dev;
+
 static int omap_abe_hw_params(struct snd_pcm_substream *substream,
        struct snd_pcm_hw_params *params)
 {
@@ -258,8 +258,6 @@ static int omap_abe_probe(struct platform_device *pdev)
        if (priv == NULL)
                return -ENOMEM;
 
-       priv->dmic_codec_dev = ERR_PTR(-EINVAL);
-
        if (snd_soc_of_parse_card_name(card, "ti,model")) {
                dev_err(&pdev->dev, "Card name is not provided\n");
                return -ENODEV;
@@ -284,13 +282,6 @@ static int omap_abe_probe(struct platform_device *pdev)
                num_links = 2;
                abe_twl6040_dai_links[1].cpu_of_node = dai_node;
                abe_twl6040_dai_links[1].platform_of_node = dai_node;
-
-               priv->dmic_codec_dev = platform_device_register_simple(
-                                               "dmic-codec", -1, NULL, 0);
-               if (IS_ERR(priv->dmic_codec_dev)) {
-                       dev_err(&pdev->dev, "Can't instantiate dmic-codec\n");
-                       return PTR_ERR(priv->dmic_codec_dev);
-               }
        } else {
                num_links = 1;
        }
@@ -299,16 +290,14 @@ static int omap_abe_probe(struct platform_device *pdev)
        of_property_read_u32(node, "ti,mclk-freq", &priv->mclk_freq);
        if (!priv->mclk_freq) {
                dev_err(&pdev->dev, "MCLK frequency not provided\n");
-               ret = -EINVAL;
-               goto err_unregister;
+               return -EINVAL;
        }
 
        card->fully_routed = 1;
 
        if (!priv->mclk_freq) {
                dev_err(&pdev->dev, "MCLK frequency missing\n");
-               ret = -ENODEV;
-               goto err_unregister;
+               return -ENODEV;
        }
 
        card->dai_link = abe_twl6040_dai_links;
@@ -317,17 +306,9 @@ static int omap_abe_probe(struct platform_device *pdev)
        snd_soc_card_set_drvdata(card, priv);
 
        ret = snd_soc_register_card(card);
-       if (ret) {
+       if (ret)
                dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n",
                        ret);
-               goto err_unregister;
-       }
-
-       return 0;
-
-err_unregister:
-       if (!IS_ERR(priv->dmic_codec_dev))
-               platform_device_unregister(priv->dmic_codec_dev);
 
        return ret;
 }
@@ -335,13 +316,9 @@ err_unregister:
 static int omap_abe_remove(struct platform_device *pdev)
 {
        struct snd_soc_card *card = platform_get_drvdata(pdev);
-       struct abe_twl6040 *priv = snd_soc_card_get_drvdata(card);
 
        snd_soc_unregister_card(card);
 
-       if (!IS_ERR(priv->dmic_codec_dev))
-               platform_device_unregister(priv->dmic_codec_dev);
-
        return 0;
 }
 
@@ -361,7 +338,33 @@ static struct platform_driver omap_abe_driver = {
        .remove = omap_abe_remove,
 };
 
-module_platform_driver(omap_abe_driver);
+static int __init omap_abe_init(void)
+{
+       int ret;
+
+       dmic_codec_dev = platform_device_register_simple("dmic-codec", -1, NULL,
+                                                        0);
+       if (IS_ERR(dmic_codec_dev)) {
+               pr_err("%s: dmic-codec device registration failed\n", __func__);
+               return PTR_ERR(dmic_codec_dev);
+       }
+
+       ret = platform_driver_register(&omap_abe_driver);
+       if (ret) {
+               pr_err("%s: platform driver registration failed\n", __func__);
+               platform_device_unregister(dmic_codec_dev);
+       }
+
+       return ret;
+}
+module_init(omap_abe_init);
+
+static void __exit omap_abe_exit(void)
+{
+       platform_driver_unregister(&omap_abe_driver);
+       platform_device_unregister(dmic_codec_dev);
+}
+module_exit(omap_abe_exit);
 
 MODULE_AUTHOR("Misael Lopez Cruz <misael.lopez@ti.com>");
 MODULE_DESCRIPTION("ALSA SoC for OMAP boards with ABE and twl6040 codec");
index e7cdc51..64609c7 100644 (file)
@@ -31,7 +31,6 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/irq.h>
-#include <linux/clk.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
 #include <linux/of_device.h>
@@ -55,7 +54,6 @@ struct omap_mcpdm {
        unsigned long phys_base;
        void __iomem *io_base;
        int irq;
-       struct clk *pdmclk;
 
        struct mutex mutex;
 
@@ -390,15 +388,14 @@ static int omap_mcpdm_probe(struct snd_soc_dai *dai)
        struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai);
        int ret;
 
-       clk_prepare_enable(mcpdm->pdmclk);
        pm_runtime_enable(mcpdm->dev);
 
        /* Disable lines while request is ongoing */
        pm_runtime_get_sync(mcpdm->dev);
        omap_mcpdm_write(mcpdm, MCPDM_REG_CTRL, 0x00);
 
-       ret = devm_request_irq(mcpdm->dev, mcpdm->irq, omap_mcpdm_irq_handler,
-                               0, "McPDM", (void *)mcpdm);
+       ret = request_irq(mcpdm->irq, omap_mcpdm_irq_handler, 0, "McPDM",
+                         (void *)mcpdm);
 
        pm_runtime_put_sync(mcpdm->dev);
 
@@ -423,9 +420,9 @@ static int omap_mcpdm_remove(struct snd_soc_dai *dai)
 {
        struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai);
 
+       free_irq(mcpdm->irq, (void *)mcpdm);
        pm_runtime_disable(mcpdm->dev);
 
-       clk_disable_unprepare(mcpdm->pdmclk);
        return 0;
 }
 
@@ -445,8 +442,6 @@ static int omap_mcpdm_suspend(struct snd_soc_dai *dai)
                mcpdm->pm_active_count++;
        }
 
-       clk_disable_unprepare(mcpdm->pdmclk);
-
        return 0;
 }
 
@@ -454,8 +449,6 @@ static int omap_mcpdm_resume(struct snd_soc_dai *dai)
 {
        struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai);
 
-       clk_prepare_enable(mcpdm->pdmclk);
-
        if (mcpdm->pm_active_count) {
                while (mcpdm->pm_active_count--)
                        pm_runtime_get_sync(mcpdm->dev);
@@ -549,15 +542,6 @@ static int asoc_mcpdm_probe(struct platform_device *pdev)
 
        mcpdm->dev = &pdev->dev;
 
-       mcpdm->pdmclk = devm_clk_get(&pdev->dev, "pdmclk");
-       if (IS_ERR(mcpdm->pdmclk)) {
-               if (PTR_ERR(mcpdm->pdmclk) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
-               dev_warn(&pdev->dev, "Error getting pdmclk (%ld)!\n",
-                        PTR_ERR(mcpdm->pdmclk));
-               mcpdm->pdmclk = NULL;
-       }
-
        ret =  devm_snd_soc_register_component(&pdev->dev,
                                               &omap_mcpdm_component,
                                               &omap_mcpdm_dai, 1);
index 50849e1..92e88bc 100644 (file)
@@ -58,10 +58,12 @@ static struct platform_device *s3c24xx_uda134x_snd_device;
 
 static int s3c24xx_uda134x_startup(struct snd_pcm_substream *substream)
 {
-       int ret = 0;
+       struct snd_soc_pcm_runtime *rtd = substream->private_data;
+       struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
 #ifdef ENFORCE_RATES
        struct snd_pcm_runtime *runtime = substream->runtime;
 #endif
+       int ret = 0;
 
        mutex_lock(&clk_lock);
        pr_debug("%s %d\n", __func__, clk_users);
@@ -71,8 +73,7 @@ static int s3c24xx_uda134x_startup(struct snd_pcm_substream *substream)
                        printk(KERN_ERR "%s cannot get xtal\n", __func__);
                        ret = PTR_ERR(xtal);
                } else {
-                       pclk = clk_get(&s3c24xx_uda134x_snd_device->dev,
-                                      "pclk");
+                       pclk = clk_get(cpu_dai->dev, "iis");
                        if (IS_ERR(pclk)) {
                                printk(KERN_ERR "%s cannot get pclk\n",
                                       __func__);
index e39f916..969a516 100644 (file)
@@ -226,8 +226,12 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io,
        ifscr = 0;
        fsrate = 0;
        if (fin != fout) {
+               u64 n;
+
                ifscr = 1;
-               fsrate = 0x0400000 / fout * fin;
+               n = (u64)0x0400000 * fin;
+               do_div(n, fout);
+               fsrate = n;
        }
 
        /*
index d2df46c..bf7b52f 100644 (file)
@@ -121,7 +121,7 @@ static int soc_compr_open_fe(struct snd_compr_stream *cstream)
 
                dpcm_be_disconnect(fe, stream);
                fe->dpcm[stream].runtime = NULL;
-               goto fe_err;
+               goto path_err;
        }
 
        dpcm_clear_pending_state(fe, stream);
@@ -136,6 +136,8 @@ static int soc_compr_open_fe(struct snd_compr_stream *cstream)
 
        return 0;
 
+path_err:
+       dpcm_path_put(&list);
 fe_err:
        if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->shutdown)
                fe->dai_link->compr_ops->shutdown(cstream);
index 16369ca..4afa8db 100644 (file)
@@ -1056,7 +1056,7 @@ static int soc_bind_dai_link(struct snd_soc_card *card,
        if (!rtd->platform) {
                dev_err(card->dev, "ASoC: platform %s not registered\n",
                        dai_link->platform_name);
-               return -EPROBE_DEFER;
+               goto _err_defer;
        }
 
        soc_add_pcm_runtime(card, rtd);
@@ -2083,14 +2083,13 @@ static int soc_cleanup_card_resources(struct snd_soc_card *card)
        /* remove auxiliary devices */
        soc_remove_aux_devices(card);
 
+       snd_soc_dapm_free(&card->dapm);
        soc_cleanup_card_debugfs(card);
 
        /* remove the card */
        if (card->remove)
                card->remove(card);
 
-       snd_soc_dapm_free(&card->dapm);
-
        snd_card_free(card->snd_card);
        return 0;
 
index 8698c26..d908ff8 100644 (file)
@@ -3493,6 +3493,7 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
        const struct snd_soc_pcm_stream *config = w->params + w->params_select;
        struct snd_pcm_substream substream;
        struct snd_pcm_hw_params *params = NULL;
+       struct snd_pcm_runtime *runtime = NULL;
        u64 fmt;
        int ret;
 
@@ -3541,6 +3542,14 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
 
        memset(&substream, 0, sizeof(substream));
 
+       /* Allocate a dummy snd_pcm_runtime for startup() and other ops() */
+       runtime = kzalloc(sizeof(*runtime), GFP_KERNEL);
+       if (!runtime) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       substream.runtime = runtime;
+
        switch (event) {
        case SND_SOC_DAPM_PRE_PMU:
                substream.stream = SNDRV_PCM_STREAM_CAPTURE;
@@ -3606,6 +3615,7 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
        }
 
 out:
+       kfree(runtime);
        kfree(params);
        return ret;
 }
index 204cc07..41aa335 100644 (file)
@@ -55,7 +55,6 @@ static int snd_line6_impulse_volume_put(struct snd_kcontrol *kcontrol,
                err = line6_pcm_acquire(line6pcm, LINE6_STREAM_IMPULSE);
                if (err < 0) {
                        line6pcm->impulse_volume = 0;
-                       line6_pcm_release(line6pcm, LINE6_STREAM_IMPULSE);
                        return err;
                }
        } else {
@@ -211,7 +210,9 @@ static void line6_stream_stop(struct snd_line6_pcm *line6pcm, int direction,
        spin_lock_irqsave(&pstr->lock, flags);
        clear_bit(type, &pstr->running);
        if (!pstr->running) {
+               spin_unlock_irqrestore(&pstr->lock, flags);
                line6_unlink_audio_urbs(line6pcm, pstr);
+               spin_lock_irqsave(&pstr->lock, flags);
                if (direction == SNDRV_PCM_STREAM_CAPTURE) {
                        line6pcm->prev_fbuf = NULL;
                        line6pcm->prev_fsize = 0;
index daf81d1..45dd348 100644 (file)
@@ -244,8 +244,8 @@ static int pod_set_system_param_int(struct usb_line6_pod *pod, int value,
 static ssize_t serial_number_show(struct device *dev,
                                  struct device_attribute *attr, char *buf)
 {
-       struct usb_interface *interface = to_usb_interface(dev);
-       struct usb_line6_pod *pod = usb_get_intfdata(interface);
+       struct snd_card *card = dev_to_snd_card(dev);
+       struct usb_line6_pod *pod = card->private_data;
 
        return sprintf(buf, "%u\n", pod->serial_number);
 }
@@ -256,8 +256,8 @@ static ssize_t serial_number_show(struct device *dev,
 static ssize_t firmware_version_show(struct device *dev,
                                     struct device_attribute *attr, char *buf)
 {
-       struct usb_interface *interface = to_usb_interface(dev);
-       struct usb_line6_pod *pod = usb_get_intfdata(interface);
+       struct snd_card *card = dev_to_snd_card(dev);
+       struct usb_line6_pod *pod = card->private_data;
 
        return sprintf(buf, "%d.%02d\n", pod->firmware_version / 100,
                       pod->firmware_version % 100);
@@ -269,8 +269,8 @@ static ssize_t firmware_version_show(struct device *dev,
 static ssize_t device_id_show(struct device *dev,
                              struct device_attribute *attr, char *buf)
 {
-       struct usb_interface *interface = to_usb_interface(dev);
-       struct usb_line6_pod *pod = usb_get_intfdata(interface);
+       struct snd_card *card = dev_to_snd_card(dev);
+       struct usb_line6_pod *pod = card->private_data;
 
        return sprintf(buf, "%d\n", pod->device_id);
 }
index 6cf1f35..152292e 100644 (file)
@@ -1141,6 +1141,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
        case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */
        case USB_ID(0x05A3, 0x9420): /* ELP HD USB Camera */
        case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
+       case USB_ID(0x1901, 0x0191): /* GE B850V3 CP2114 audio interface */
        case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */
        case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */
        case USB_ID(0x1de7, 0x0114): /* Phoenix Audio MT202pcs */
index f209ea1..3051f86 100644 (file)
@@ -87,9 +87,11 @@ struct kvm_regs {
 /* Supported VGICv3 address types  */
 #define KVM_VGIC_V3_ADDR_TYPE_DIST     2
 #define KVM_VGIC_V3_ADDR_TYPE_REDIST   3
+#define KVM_VGIC_ITS_ADDR_TYPE         4
 
 #define KVM_VGIC_V3_DIST_SIZE          SZ_64K
 #define KVM_VGIC_V3_REDIST_SIZE                (2 * SZ_64K)
+#define KVM_VGIC_V3_ITS_SIZE           (2 * SZ_64K)
 
 #define KVM_ARM_VCPU_POWER_OFF         0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_EL1_32BIT         1 /* CPU running a 32bit VM */
index 3b8e99e..a2ffec4 100644 (file)
@@ -93,6 +93,47 @@ struct kvm_s390_vm_cpu_machine {
        __u64 fac_list[256];
 };
 
+#define KVM_S390_VM_CPU_PROCESSOR_FEAT 2
+#define KVM_S390_VM_CPU_MACHINE_FEAT   3
+
+#define KVM_S390_VM_CPU_FEAT_NR_BITS   1024
+#define KVM_S390_VM_CPU_FEAT_ESOP      0
+#define KVM_S390_VM_CPU_FEAT_SIEF2     1
+#define KVM_S390_VM_CPU_FEAT_64BSCAO   2
+#define KVM_S390_VM_CPU_FEAT_SIIF      3
+#define KVM_S390_VM_CPU_FEAT_GPERE     4
+#define KVM_S390_VM_CPU_FEAT_GSLS      5
+#define KVM_S390_VM_CPU_FEAT_IB                6
+#define KVM_S390_VM_CPU_FEAT_CEI       7
+#define KVM_S390_VM_CPU_FEAT_IBS       8
+#define KVM_S390_VM_CPU_FEAT_SKEY      9
+#define KVM_S390_VM_CPU_FEAT_CMMA      10
+#define KVM_S390_VM_CPU_FEAT_PFMFI     11
+#define KVM_S390_VM_CPU_FEAT_SIGPIF    12
+struct kvm_s390_vm_cpu_feat {
+       __u64 feat[16];
+};
+
+#define KVM_S390_VM_CPU_PROCESSOR_SUBFUNC      4
+#define KVM_S390_VM_CPU_MACHINE_SUBFUNC                5
+/* for "test bit" instructions MSB 0 bit ordering, for "query" raw blocks */
+struct kvm_s390_vm_cpu_subfunc {
+       __u8 plo[32];           /* always */
+       __u8 ptff[16];          /* with TOD-clock steering */
+       __u8 kmac[16];          /* with MSA */
+       __u8 kmc[16];           /* with MSA */
+       __u8 km[16];            /* with MSA */
+       __u8 kimd[16];          /* with MSA */
+       __u8 klmd[16];          /* with MSA */
+       __u8 pckmo[16];         /* with MSA3 */
+       __u8 kmctr[16];         /* with MSA4 */
+       __u8 kmf[16];           /* with MSA4 */
+       __u8 kmo[16];           /* with MSA4 */
+       __u8 pcc[16];           /* with MSA4 */
+       __u8 ppno[16];          /* with MSA5 */
+       __u8 reserved[1824];
+};
+
 /* kvm attributes for crypto */
 #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW       0
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW       1
index 8fb5d4a..3ac6343 100644 (file)
        exit_code_ipa0(0xB2, 0x4c, "TAR"),      \
        exit_code_ipa0(0xB2, 0x50, "CSP"),      \
        exit_code_ipa0(0xB2, 0x54, "MVPG"),     \
+       exit_code_ipa0(0xB2, 0x56, "STHYI"),    \
        exit_code_ipa0(0xB2, 0x58, "BSG"),      \
        exit_code_ipa0(0xB2, 0x5a, "BSA"),      \
        exit_code_ipa0(0xB2, 0x5f, "CHSC"),     \
index 448ed96..1c14c25 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * gpio-hammer - example swiss army knife to shake GPIO lines on a system
+ * gpio-event-mon - monitor GPIO line events from userspace
  *
  * Copyright (C) 2016 Linus Walleij
  *
index 0e8a1f7..f39c0e9 100644 (file)
@@ -348,7 +348,7 @@ int main(int argc, char **argv)
        int notrigger = 0;
        char *dummy;
 
-       struct iio_channel_info *channels;
+       struct iio_channel_info *channels = NULL;
 
        register_cleanup();
 
@@ -456,7 +456,7 @@ int main(int argc, char **argv)
 
        if (notrigger) {
                printf("trigger-less mode selected\n");
-       } if (trig_num >= 0) {
+       } else if (trig_num >= 0) {
                char *trig_dev_name;
                ret = asprintf(&trig_dev_name, "%strigger%d", iio_dir, trig_num);
                if (ret < 0) {
index b968794..f436d24 100644 (file)
@@ -8,7 +8,11 @@ void *memdup(const void *src, size_t len);
 
 int strtobool(const char *s, bool *res);
 
-#ifdef __GLIBC__
+/*
+ * glibc based builds needs the extern while uClibc doesn't.
+ * However uClibc headers also define __GLIBC__ hence the hack below
+ */
+#if defined(__GLIBC__) && !defined(__UCLIBC__)
 extern size_t strlcpy(char *dest, const char *src, size_t size);
 #endif
 
index 8d4dc97..35745a7 100644 (file)
@@ -97,6 +97,7 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
        }
 }
 
+#ifdef HAVE_LIBELF_SUPPORT
 void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
                                           int ntevs)
 {
@@ -118,5 +119,6 @@ void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
                }
        }
 }
+#endif /* HAVE_LIBELF_SUPPORT */
 
 #endif
index fb51457..a2412e9 100644 (file)
@@ -501,7 +501,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
        struct intel_pt_recording *ptr =
                        container_of(itr, struct intel_pt_recording, itr);
        struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
-       bool have_timing_info;
+       bool have_timing_info, need_immediate = false;
        struct perf_evsel *evsel, *intel_pt_evsel = NULL;
        const struct cpu_map *cpus = evlist->cpus;
        bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
@@ -655,6 +655,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
                                ptr->have_sched_switch = 3;
                        } else {
                                opts->record_switch_events = true;
+                               need_immediate = true;
                                if (cpu_wide)
                                        ptr->have_sched_switch = 3;
                                else
@@ -700,6 +701,9 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
                tracking_evsel->attr.freq = 0;
                tracking_evsel->attr.sample_period = 1;
 
+               if (need_immediate)
+                       tracking_evsel->immediate = true;
+
                /* In per-cpu case, always need the time of mmap events etc */
                if (!cpu_map__empty(cpus)) {
                        perf_evsel__set_sample_bit(tracking_evsel, TIME);
index d608a2c..d1ce29b 100644 (file)
@@ -88,6 +88,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
        if (mem->operation & MEM_OPERATION_LOAD)
                perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
 
+       if (mem->operation & MEM_OPERATION_STORE)
+               perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
+
        if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
                rec_argv[i++] = "-W";
 
index 9c640a8..c859e59 100644 (file)
@@ -371,14 +371,16 @@ static int perf_session__check_output_opt(struct perf_session *session)
 
        if (!no_callchain) {
                bool use_callchain = false;
+               bool not_pipe = false;
 
                evlist__for_each_entry(session->evlist, evsel) {
+                       not_pipe = true;
                        if (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
                                use_callchain = true;
                                break;
                        }
                }
-               if (!use_callchain)
+               if (not_pipe && !use_callchain)
                        symbol_conf.use_callchain = false;
        }
 
@@ -1690,8 +1692,13 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
        snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
 
        scripts_dir = opendir(scripts_path);
-       if (!scripts_dir)
-               return -1;
+       if (!scripts_dir) {
+               fprintf(stdout,
+                       "open(%s) failed.\n"
+                       "Check \"PERF_EXEC_PATH\" env to set scripts dir.\n",
+                       scripts_path);
+               exit(-1);
+       }
 
        for_each_lang(scripts_path, scripts_dir, lang_dirent) {
                snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
index d9b80ef..21fd573 100644 (file)
@@ -507,17 +507,17 @@ static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
        u8 op, result, type = (config >>  0) & 0xff;
        const char *err = "unknown-ext-hardware-cache-type";
 
-       if (type > PERF_COUNT_HW_CACHE_MAX)
+       if (type >= PERF_COUNT_HW_CACHE_MAX)
                goto out_err;
 
        op = (config >>  8) & 0xff;
        err = "unknown-ext-hardware-cache-op";
-       if (op > PERF_COUNT_HW_CACHE_OP_MAX)
+       if (op >= PERF_COUNT_HW_CACHE_OP_MAX)
                goto out_err;
 
        result = (config >> 16) & 0xff;
        err = "unknown-ext-hardware-cache-result";
-       if (result > PERF_COUNT_HW_CACHE_RESULT_MAX)
+       if (result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
                goto out_err;
 
        err = "invalid-cache";
index 9c8f15d..8ff6c6a 100644 (file)
@@ -123,8 +123,6 @@ struct intel_pt_decoder {
        bool have_calc_cyc_to_tsc;
        int exec_mode;
        unsigned int insn_bytes;
-       uint64_t sign_bit;
-       uint64_t sign_bits;
        uint64_t period;
        enum intel_pt_period_type period_type;
        uint64_t tot_insn_cnt;
@@ -191,9 +189,6 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
        decoder->data               = params->data;
        decoder->return_compression = params->return_compression;
 
-       decoder->sign_bit           = (uint64_t)1 << 47;
-       decoder->sign_bits          = ~(((uint64_t)1 << 48) - 1);
-
        decoder->period             = params->period;
        decoder->period_type        = params->period_type;
 
@@ -362,21 +357,30 @@ int intel_pt__strerror(int code, char *buf, size_t buflen)
        return 0;
 }
 
-static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder,
-                                const struct intel_pt_pkt *packet,
+static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
                                 uint64_t last_ip)
 {
        uint64_t ip;
 
        switch (packet->count) {
-       case 2:
+       case 1:
                ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
                     packet->payload;
                break;
-       case 4:
+       case 2:
                ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
                     packet->payload;
                break;
+       case 3:
+               ip = packet->payload;
+               /* Sign-extend 6-byte ip */
+               if (ip & (uint64_t)0x800000000000ULL)
+                       ip |= (uint64_t)0xffff000000000000ULL;
+               break;
+       case 4:
+               ip = (last_ip & (uint64_t)0xffff000000000000ULL) |
+                    packet->payload;
+               break;
        case 6:
                ip = packet->payload;
                break;
@@ -384,16 +388,12 @@ static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder,
                return 0;
        }
 
-       if (ip & decoder->sign_bit)
-               return ip | decoder->sign_bits;
-
        return ip;
 }
 
 static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
 {
-       decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet,
-                                           decoder->last_ip);
+       decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
 }
 
 static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
@@ -1657,6 +1657,12 @@ next:
        }
 }
 
+static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
+{
+       return decoder->last_ip || decoder->packet.count == 0 ||
+              decoder->packet.count == 3 || decoder->packet.count == 6;
+}
+
 /* Walk PSB+ packets to get in sync. */
 static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
 {
@@ -1677,8 +1683,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
 
                case INTEL_PT_FUP:
                        decoder->pge = true;
-                       if (decoder->last_ip || decoder->packet.count == 6 ||
-                           decoder->packet.count == 0) {
+                       if (intel_pt_have_ip(decoder)) {
                                uint64_t current_ip = decoder->ip;
 
                                intel_pt_set_ip(decoder);
@@ -1767,8 +1772,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
                case INTEL_PT_TIP_PGE:
                case INTEL_PT_TIP:
                        decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
-                       if (decoder->last_ip || decoder->packet.count == 6 ||
-                           decoder->packet.count == 0)
+                       if (intel_pt_have_ip(decoder))
                                intel_pt_set_ip(decoder);
                        if (decoder->ip)
                                return 0;
@@ -1776,9 +1780,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
 
                case INTEL_PT_FUP:
                        if (decoder->overflow) {
-                               if (decoder->last_ip ||
-                                   decoder->packet.count == 6 ||
-                                   decoder->packet.count == 0)
+                               if (intel_pt_have_ip(decoder))
                                        intel_pt_set_ip(decoder);
                                if (decoder->ip)
                                        return 0;
index b1257c8..4f7b320 100644 (file)
@@ -292,36 +292,46 @@ static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte,
                           const unsigned char *buf, size_t len,
                           struct intel_pt_pkt *packet)
 {
-       switch (byte >> 5) {
+       int ip_len;
+
+       packet->count = byte >> 5;
+
+       switch (packet->count) {
        case 0:
-               packet->count = 0;
+               ip_len = 0;
                break;
        case 1:
                if (len < 3)
                        return INTEL_PT_NEED_MORE_BYTES;
-               packet->count = 2;
+               ip_len = 2;
                packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
                break;
        case 2:
                if (len < 5)
                        return INTEL_PT_NEED_MORE_BYTES;
-               packet->count = 4;
+               ip_len = 4;
                packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1));
                break;
        case 3:
-       case 6:
+       case 4:
                if (len < 7)
                        return INTEL_PT_NEED_MORE_BYTES;
-               packet->count = 6;
+               ip_len = 6;
                memcpy_le64(&packet->payload, buf + 1, 6);
                break;
+       case 6:
+               if (len < 9)
+                       return INTEL_PT_NEED_MORE_BYTES;
+               ip_len = 8;
+               packet->payload = le64_to_cpu(*(uint64_t *)(buf + 1));
+               break;
        default:
                return INTEL_PT_BAD_PACKET;
        }
 
        packet->type = type;
 
-       return packet->count + 1;
+       return ip_len + 1;
 }
 
 static int intel_pt_get_mode(const unsigned char *buf, size_t len,
index 9f3305f..95f0884 100644 (file)
@@ -1,3 +1,4 @@
+#include <sys/sysmacros.h>
 #include <sys/types.h>
 #include <stdio.h>
 #include <stdlib.h>
index 9aed9c3..9c3b9ed 100644 (file)
@@ -133,7 +133,7 @@ int probe_file__open_both(int *kfd, int *ufd, int flag)
 /* Get raw string list of current kprobe_events  or uprobe_events */
 struct strlist *probe_file__get_rawlist(int fd)
 {
-       int ret, idx;
+       int ret, idx, fddup;
        FILE *fp;
        char buf[MAX_CMDLEN];
        char *p;
@@ -143,8 +143,17 @@ struct strlist *probe_file__get_rawlist(int fd)
                return NULL;
 
        sl = strlist__new(NULL, NULL);
+       if (sl == NULL)
+               return NULL;
+
+       fddup = dup(fd);
+       if (fddup < 0)
+               goto out_free_sl;
+
+       fp = fdopen(fddup, "r");
+       if (!fp)
+               goto out_close_fddup;
 
-       fp = fdopen(dup(fd), "r");
        while (!feof(fp)) {
                p = fgets(buf, MAX_CMDLEN, fp);
                if (!p)
@@ -156,13 +165,21 @@ struct strlist *probe_file__get_rawlist(int fd)
                ret = strlist__add(sl, buf);
                if (ret < 0) {
                        pr_debug("strlist__add failed (%d)\n", ret);
-                       strlist__delete(sl);
-                       return NULL;
+                       goto out_close_fp;
                }
        }
        fclose(fp);
 
        return sl;
+
+out_close_fp:
+       fclose(fp);
+       goto out_free_sl;
+out_close_fddup:
+       close(fddup);
+out_free_sl:
+       strlist__delete(sl);
+       return NULL;
 }
 
 static struct strlist *__probe_file__get_namelist(int fd, bool include_group)
@@ -447,12 +464,17 @@ static int probe_cache__load(struct probe_cache *pcache)
 {
        struct probe_cache_entry *entry = NULL;
        char buf[MAX_CMDLEN], *p;
-       int ret = 0;
+       int ret = 0, fddup;
        FILE *fp;
 
-       fp = fdopen(dup(pcache->fd), "r");
-       if (!fp)
+       fddup = dup(pcache->fd);
+       if (fddup < 0)
+               return -errno;
+       fp = fdopen(fddup, "r");
+       if (!fp) {
+               close(fddup);
                return -EINVAL;
+       }
 
        while (!feof(fp)) {
                if (!fgets(buf, MAX_CMDLEN, fp))
index a34321e..a811c13 100644 (file)
@@ -837,7 +837,8 @@ int dso__load_sym(struct dso *dso, struct map *map,
        sec = syms_ss->symtab;
        shdr = syms_ss->symshdr;
 
-       if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL))
+       if (elf_section_by_name(runtime_ss->elf, &runtime_ss->ehdr, &tshdr,
+                               ".text", NULL))
                dso->text_offset = tshdr.sh_addr - tshdr.sh_offset;
 
        if (runtime_ss->opdsec)
index cf5e250..783a53f 100644 (file)
@@ -66,7 +66,7 @@ static int entry(u64 ip, struct unwind_info *ui)
        if (__report_module(&al, ip, ui))
                return -1;
 
-       e->ip  = ip;
+       e->ip  = al.addr;
        e->map = al.map;
        e->sym = al.sym;
 
index 97c0f8f..20c2e57 100644 (file)
@@ -542,7 +542,7 @@ static int entry(u64 ip, struct thread *thread,
        thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
                                   MAP__FUNCTION, ip, &al);
 
-       e.ip = ip;
+       e.ip = al.addr;
        e.map = al.map;
        e.sym = al.sym;
 
index 4fde8c7..77e6ccf 100644 (file)
@@ -33,6 +33,7 @@
 static struct timecounter *timecounter;
 static struct workqueue_struct *wqueue;
 static unsigned int host_vtimer_irq;
+static u32 host_vtimer_irq_flags;
 
 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 {
@@ -365,7 +366,7 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 
 static void kvm_timer_init_interrupt(void *info)
 {
-       enable_percpu_irq(host_vtimer_irq, 0);
+       enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
 }
 
 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
@@ -432,6 +433,14 @@ int kvm_timer_hyp_init(void)
        }
        host_vtimer_irq = info->virtual_irq;
 
+       host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
+       if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
+           host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
+               kvm_err("Invalid trigger for IRQ%d, assuming level low\n",
+                       host_vtimer_irq);
+               host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
+       }
+
        err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
                                 "kvm guest timer", kvm_get_running_vcpus());
        if (err) {
index 07411cf..4660a7d 100644 (file)
@@ -51,7 +51,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid)
 
        irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL);
        if (!irq)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        INIT_LIST_HEAD(&irq->lpi_list);
        INIT_LIST_HEAD(&irq->ap_list);
@@ -441,39 +441,63 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
  * Find the target VCPU and the LPI number for a given devid/eventid pair
  * and make this IRQ pending, possibly injecting it.
  * Must be called with the its_lock mutex held.
+ * Returns 0 on success, a positive error value for any ITS mapping
+ * related errors and negative error values for generic errors.
  */
-static void vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
-                                u32 devid, u32 eventid)
+static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
+                               u32 devid, u32 eventid)
 {
+       struct kvm_vcpu *vcpu;
        struct its_itte *itte;
 
        if (!its->enabled)
-               return;
+               return -EBUSY;
 
        itte = find_itte(its, devid, eventid);
-       /* Triggering an unmapped IRQ gets silently dropped. */
-       if (itte && its_is_collection_mapped(itte->collection)) {
-               struct kvm_vcpu *vcpu;
-
-               vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
-               if (vcpu && vcpu->arch.vgic_cpu.lpis_enabled) {
-                       spin_lock(&itte->irq->irq_lock);
-                       itte->irq->pending = true;
-                       vgic_queue_irq_unlock(kvm, itte->irq);
-               }
-       }
+       if (!itte || !its_is_collection_mapped(itte->collection))
+               return E_ITS_INT_UNMAPPED_INTERRUPT;
+
+       vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
+       if (!vcpu)
+               return E_ITS_INT_UNMAPPED_INTERRUPT;
+
+       if (!vcpu->arch.vgic_cpu.lpis_enabled)
+               return -EBUSY;
+
+       spin_lock(&itte->irq->irq_lock);
+       itte->irq->pending = true;
+       vgic_queue_irq_unlock(kvm, itte->irq);
+
+       return 0;
+}
+
+static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev)
+{
+       struct vgic_io_device *iodev;
+
+       if (dev->ops != &kvm_io_gic_ops)
+               return NULL;
+
+       iodev = container_of(dev, struct vgic_io_device, dev);
+
+       if (iodev->iodev_type != IODEV_ITS)
+               return NULL;
+
+       return iodev;
 }
 
 /*
  * Queries the KVM IO bus framework to get the ITS pointer from the given
  * doorbell address.
  * We then call vgic_its_trigger_msi() with the decoded data.
+ * According to the KVM_SIGNAL_MSI API description returns 1 on success.
  */
 int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
 {
        u64 address;
        struct kvm_io_device *kvm_io_dev;
        struct vgic_io_device *iodev;
+       int ret;
 
        if (!vgic_has_its(kvm))
                return -ENODEV;
@@ -485,15 +509,28 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
 
        kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
        if (!kvm_io_dev)
-               return -ENODEV;
+               return -EINVAL;
 
-       iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
+       iodev = vgic_get_its_iodev(kvm_io_dev);
+       if (!iodev)
+               return -EINVAL;
 
        mutex_lock(&iodev->its->its_lock);
-       vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data);
+       ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data);
        mutex_unlock(&iodev->its->its_lock);
 
-       return 0;
+       if (ret < 0)
+               return ret;
+
+       /*
+        * KVM_SIGNAL_MSI demands a return value > 0 for success and 0
+        * if the guest has blocked the MSI. So we map any LPI mapping
+        * related error to that.
+        */
+       if (ret)
+               return 0;
+       else
+               return 1;
 }
 
 /* Requires the its_lock to be held. */
@@ -502,7 +539,8 @@ static void its_free_itte(struct kvm *kvm, struct its_itte *itte)
        list_del(&itte->itte_list);
 
        /* This put matches the get in vgic_add_lpi. */
-       vgic_put_irq(kvm, itte->irq);
+       if (itte->irq)
+               vgic_put_irq(kvm, itte->irq);
 
        kfree(itte);
 }
@@ -697,6 +735,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
        struct its_device *device;
        struct its_collection *collection, *new_coll = NULL;
        int lpi_nr;
+       struct vgic_irq *irq;
 
        device = find_its_device(its, device_id);
        if (!device)
@@ -710,6 +749,10 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
            lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser))
                return E_ITS_MAPTI_PHYSICALID_OOR;
 
+       /* If there is an existing mapping, behavior is UNPREDICTABLE. */
+       if (find_itte(its, device_id, event_id))
+               return 0;
+
        collection = find_collection(its, coll_id);
        if (!collection) {
                int ret = vgic_its_alloc_collection(its, &collection, coll_id);
@@ -718,22 +761,28 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
                new_coll = collection;
        }
 
-       itte = find_itte(its, device_id, event_id);
+       itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL);
        if (!itte) {
-               itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL);
-               if (!itte) {
-                       if (new_coll)
-                               vgic_its_free_collection(its, coll_id);
-                       return -ENOMEM;
-               }
-
-               itte->event_id  = event_id;
-               list_add_tail(&itte->itte_list, &device->itt_head);
+               if (new_coll)
+                       vgic_its_free_collection(its, coll_id);
+               return -ENOMEM;
        }
 
+       itte->event_id  = event_id;
+       list_add_tail(&itte->itte_list, &device->itt_head);
+
        itte->collection = collection;
        itte->lpi = lpi_nr;
-       itte->irq = vgic_add_lpi(kvm, lpi_nr);
+
+       irq = vgic_add_lpi(kvm, lpi_nr);
+       if (IS_ERR(irq)) {
+               if (new_coll)
+                       vgic_its_free_collection(its, coll_id);
+               its_free_itte(kvm, itte);
+               return PTR_ERR(irq);
+       }
+       itte->irq = irq;
+
        update_affinity_itte(kvm, itte);
 
        /*
@@ -981,9 +1030,7 @@ static int vgic_its_cmd_handle_int(struct kvm *kvm, struct vgic_its *its,
        u32 msi_data = its_cmd_get_id(its_cmd);
        u64 msi_devid = its_cmd_get_deviceid(its_cmd);
 
-       vgic_its_trigger_msi(kvm, its, msi_devid, msi_data);
-
-       return 0;
+       return vgic_its_trigger_msi(kvm, its, msi_devid, msi_data);
 }
 
 /*
@@ -1288,13 +1335,13 @@ void vgic_enable_lpis(struct kvm_vcpu *vcpu)
                its_sync_lpi_pending_table(vcpu);
 }
 
-static int vgic_its_init_its(struct kvm *kvm, struct vgic_its *its)
+static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its)
 {
        struct vgic_io_device *iodev = &its->iodev;
        int ret;
 
-       if (its->initialized)
-               return 0;
+       if (!its->initialized)
+               return -EBUSY;
 
        if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base))
                return -ENXIO;
@@ -1311,9 +1358,6 @@ static int vgic_its_init_its(struct kvm *kvm, struct vgic_its *its)
                                      KVM_VGIC_V3_ITS_SIZE, &iodev->dev);
        mutex_unlock(&kvm->slots_lock);
 
-       if (!ret)
-               its->initialized = true;
-
        return ret;
 }
 
@@ -1435,9 +1479,6 @@ static int vgic_its_set_attr(struct kvm_device *dev,
                if (type != KVM_VGIC_ITS_ADDR_TYPE)
                        return -ENODEV;
 
-               if (its->initialized)
-                       return -EBUSY;
-
                if (copy_from_user(&addr, uaddr, sizeof(addr)))
                        return -EFAULT;
 
@@ -1453,7 +1494,9 @@ static int vgic_its_set_attr(struct kvm_device *dev,
        case KVM_DEV_ARM_VGIC_GRP_CTRL:
                switch (attr->attr) {
                case KVM_DEV_ARM_VGIC_CTRL_INIT:
-                       return vgic_its_init_its(dev->kvm, its);
+                       its->initialized = true;
+
+                       return 0;
                }
                break;
        }
@@ -1498,3 +1541,30 @@ int kvm_vgic_register_its_device(void)
        return kvm_register_device_ops(&kvm_arm_vgic_its_ops,
                                       KVM_DEV_TYPE_ARM_VGIC_ITS);
 }
+
+/*
+ * Registers all ITSes with the kvm_io_bus framework.
+ * To follow the existing VGIC initialization sequence, this has to be
+ * done as late as possible, just before the first VCPU runs.
+ */
+int vgic_register_its_iodevs(struct kvm *kvm)
+{
+       struct kvm_device *dev;
+       int ret = 0;
+
+       list_for_each_entry(dev, &kvm->devices, vm_node) {
+               if (dev->ops != &kvm_arm_vgic_its_ops)
+                       continue;
+
+               ret = vgic_register_its_iodev(kvm, dev->private);
+               if (ret)
+                       return ret;
+               /*
+                * We don't need to care about tearing down previously
+                * registered ITSes, as the kvm_io_bus framework removes
+                * them for us if the VM gets destroyed.
+                */
+       }
+
+       return ret;
+}
index ff668e0..90d8181 100644 (file)
@@ -306,16 +306,19 @@ static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu,
 {
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-       u64 propbaser = dist->propbaser;
+       u64 old_propbaser, propbaser;
 
        /* Storing a value with LPIs already enabled is undefined */
        if (vgic_cpu->lpis_enabled)
                return;
 
-       propbaser = update_64bit_reg(propbaser, addr & 4, len, val);
-       propbaser = vgic_sanitise_propbaser(propbaser);
-
-       dist->propbaser = propbaser;
+       do {
+               old_propbaser = dist->propbaser;
+               propbaser = old_propbaser;
+               propbaser = update_64bit_reg(propbaser, addr & 4, len, val);
+               propbaser = vgic_sanitise_propbaser(propbaser);
+       } while (cmpxchg64(&dist->propbaser, old_propbaser,
+                          propbaser) != old_propbaser);
 }
 
 static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu,
@@ -331,16 +334,19 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
                                     unsigned long val)
 {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-       u64 pendbaser = vgic_cpu->pendbaser;
+       u64 old_pendbaser, pendbaser;
 
        /* Storing a value with LPIs already enabled is undefined */
        if (vgic_cpu->lpis_enabled)
                return;
 
-       pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val);
-       pendbaser = vgic_sanitise_pendbaser(pendbaser);
-
-       vgic_cpu->pendbaser = pendbaser;
+       do {
+               old_pendbaser = vgic_cpu->pendbaser;
+               pendbaser = old_pendbaser;
+               pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val);
+               pendbaser = vgic_sanitise_pendbaser(pendbaser);
+       } while (cmpxchg64(&vgic_cpu->pendbaser, old_pendbaser,
+                          pendbaser) != old_pendbaser);
 }
 
 /*
index 0506543..9f0dae3 100644 (file)
@@ -289,6 +289,14 @@ int vgic_v3_map_resources(struct kvm *kvm)
                goto out;
        }
 
+       if (vgic_has_its(kvm)) {
+               ret = vgic_register_its_iodevs(kvm);
+               if (ret) {
+                       kvm_err("Unable to register VGIC ITS MMIO regions\n");
+                       goto out;
+               }
+       }
+
        dist->ready = true;
 
 out:
index e7aeac7..e83b7fe 100644 (file)
@@ -117,17 +117,17 @@ static void vgic_irq_release(struct kref *ref)
 
 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
 {
-       struct vgic_dist *dist;
+       struct vgic_dist *dist = &kvm->arch.vgic;
 
        if (irq->intid < VGIC_MIN_LPI)
                return;
 
-       if (!kref_put(&irq->refcount, vgic_irq_release))
+       spin_lock(&dist->lpi_list_lock);
+       if (!kref_put(&irq->refcount, vgic_irq_release)) {
+               spin_unlock(&dist->lpi_list_lock);
                return;
+       };
 
-       dist = &kvm->arch.vgic;
-
-       spin_lock(&dist->lpi_list_lock);
        list_del(&irq->lpi_list);
        dist->lpi_list_count--;
        spin_unlock(&dist->lpi_list_lock);
index 1d8e21d..6c4625c 100644 (file)
@@ -84,6 +84,7 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu);
 int vgic_v3_probe(const struct gic_kvm_info *info);
 int vgic_v3_map_resources(struct kvm *kvm);
 int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
+int vgic_register_its_iodevs(struct kvm *kvm);
 bool vgic_has_its(struct kvm *kvm);
 int kvm_vgic_register_its_device(void);
 void vgic_enable_lpis(struct kvm_vcpu *vcpu);
@@ -140,6 +141,11 @@ static inline int vgic_register_redist_iodevs(struct kvm *kvm,
        return -ENODEV;
 }
 
+static inline int vgic_register_its_iodevs(struct kvm *kvm)
+{
+       return -ENODEV;
+}
+
 static inline bool vgic_has_its(struct kvm *kvm)
 {
        return false;